diff --git a/environment/knowledge.go b/environment/knowledge.go index cf2cb6a..e03dc5b 100644 --- a/environment/knowledge.go +++ b/environment/knowledge.go @@ -5,6 +5,7 @@ import ( "encoding/base64" "encoding/json" "fmt" + "io/fs" "log/slog" "os" "path/filepath" @@ -28,14 +29,17 @@ const ( // validateKnowledgeRelPath enforces the path rules for knowledge file operations. // -// Layout: every staged file lives under `knowledge//` where -// scope is `account` or `team_`. The leading `knowledge/` segment -// matches the runner-relative form of the read path Safari uses -// (`/knowledge//`), so a freshly staged file lands at +// Layout: every staged file lives under `knowledge//` where +// scope is `account` or `team_` and the leaf may itself span one or +// more subdirectory segments (`runbooks/lead.md`). The leading `knowledge/` +// segment matches the runner-relative form of the read path Safari uses +// (`/knowledge//`), so a freshly staged file lands at // exactly the location a follow-up read will probe — no contract drift. // -// Bare-leaf paths (`DUTY.md`), deeper trees (`knowledge/account/sub/foo.md`), -// missing prefix (`account/foo.md`), unknown scope segments, and the +// Every leaf segment is validated so a nested path can never escape the scope +// directory when joined onto the workspace root: no traversal (`.`/`..`), no +// empty segment, and no dotfile. Bare-leaf paths (`DUTY.md`), a missing +// `knowledge/` prefix (`account/foo.md`), unknown scope segments, and the // sentinel filename are all rejected. Backslashes are blocked to cover the // Windows-style traversal vector defensively even though the runner only // ships on unix. @@ -47,23 +51,25 @@ func validateKnowledgeRelPath(relPath string) error { return fmt.Errorf("rel_path must not contain backslash: %q", relPath) } parts := strings.Split(relPath, "/") - if len(parts) != 3 { - return fmt.Errorf("rel_path must be knowledge//, got %q", relPath) + if len(parts) < 3 { + return fmt.Errorf("rel_path must be knowledge//, got %q", relPath) } - root, scope, leaf := parts[0], parts[1], parts[2] + root, scope, leafSegs := parts[0], parts[1], parts[2:] if root != "knowledge" { return fmt.Errorf("rel_path must start with 'knowledge/', got %q", relPath) } if scope != "account" && !teamScopeRe.MatchString(scope) { return fmt.Errorf("rel_path scope must be 'account' or 'team_', got %q", scope) } - if leaf == "" || leaf == "." || leaf == ".." { - return fmt.Errorf("rel_path leaf must be a real filename, got %q", leaf) - } - if strings.HasPrefix(leaf, ".") { - return fmt.Errorf("rel_path leaf must not start with '.': %q", leaf) + for _, seg := range leafSegs { + if seg == "" || seg == "." || seg == ".." { + return fmt.Errorf("rel_path leaf segment must be a real filename, got %q", relPath) + } + if strings.HasPrefix(seg, ".") { + return fmt.Errorf("rel_path leaf segment must not start with '.': %q", seg) + } } - if leaf == sentinelName { + if leafSegs[len(leafSegs)-1] == sentinelName { return fmt.Errorf("rel_path leaf must not be the sentinel filename") } return nil @@ -326,43 +332,40 @@ func (e *Environment) ReconcileKnowledgeManifest(ctx context.Context, args *prot return result, nil } -// walkKnowledgeTree returns every leaf file under // -// as `knowledge//` rel-paths. Hidden files (the sentinel) and -// anything failing validation are skipped; nested directories beneath a scope -// are ignored because the layout forbids them. +// walkKnowledgeTree returns every leaf file anywhere under +// //… as `knowledge//` rel-paths, +// recursing into subdirectories so nested files are reconciled too. Hidden +// files (the sentinel), files outside a valid scope, and anything failing +// validation are skipped. The sentinel lives one level above knowledgeRoot +// (at the workspace root), so it is never visited here. func walkKnowledgeTree(knowledgeRoot string) ([]string, error) { - entries, err := os.ReadDir(knowledgeRoot) - if err != nil { - if os.IsNotExist(err) { - return nil, nil - } - return nil, fmt.Errorf("read knowledge root: %w", err) - } - var paths []string - for _, scope := range entries { - if !scope.IsDir() { - continue + walkErr := filepath.WalkDir(knowledgeRoot, func(p string, d fs.DirEntry, err error) error { + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err } - scopeName := scope.Name() - if scopeName != "account" && !teamScopeRe.MatchString(scopeName) { - continue + if d.IsDir() { + return nil } - leaves, err := os.ReadDir(filepath.Join(knowledgeRoot, scopeName)) - if err != nil { - slog.Warn("failed to read scope directory", "scope", scopeName, "error", err) - continue + sub, relErr := filepath.Rel(knowledgeRoot, p) + if relErr != nil { + return nil } - for _, leaf := range leaves { - if leaf.IsDir() { - continue - } - rel := "knowledge/" + scopeName + "/" + leaf.Name() - if err := validateKnowledgeRelPath(rel); err != nil { - continue - } - paths = append(paths, rel) + rel := "knowledge/" + filepath.ToSlash(sub) + if err := validateKnowledgeRelPath(rel); err != nil { + return nil + } + paths = append(paths, rel) + return nil + }) + if walkErr != nil { + if os.IsNotExist(walkErr) { + return nil, nil } + return nil, fmt.Errorf("walk knowledge root: %w", walkErr) } return paths, nil } diff --git a/environment/knowledge_test.go b/environment/knowledge_test.go index 2c05795..e9546b5 100644 --- a/environment/knowledge_test.go +++ b/environment/knowledge_test.go @@ -83,9 +83,11 @@ func TestStageKnowledgeFiles_AccountAndTeam(t *testing.T) { assert.Equal(t, "tsum", sentinel["knowledge/team_42/escalation.md"]) } -// Case 2: mixed batch — one valid file + one with an extra '/' segment. +// Case 2: mixed batch — one valid file + one with a `..` traversal segment. // The valid file must land; the invalid one must return an error in the ack; -// the sentinel must contain only the valid entry. +// the sentinel must contain only the valid entry. (A plain nested path is now +// valid — see TestStageKnowledgeFiles_NestedSubdir — so the rejected case here +// is a genuine escape attempt, not merely a deeper tree.) func TestStageKnowledgeFiles_MixedValidity(t *testing.T) { ws := newTestEnvironment(t) ctx := context.Background() @@ -93,7 +95,7 @@ func TestStageKnowledgeFiles_MixedValidity(t *testing.T) { result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{ Files: []protocol.KnowledgeFile{ {RelPath: "knowledge/account/DUTY.md", Checksum: "goodsum", ContentB64: b64("content")}, - {RelPath: "knowledge/account/sub/evil.md", Checksum: "badsum", ContentB64: b64("evil")}, + {RelPath: "knowledge/account/../evil.md", Checksum: "badsum", ContentB64: b64("evil")}, }, }) require.NoError(t, err) @@ -103,21 +105,47 @@ func TestStageKnowledgeFiles_MixedValidity(t *testing.T) { assert.True(t, result.Files[0].Success) assert.Empty(t, result.Files[0].Error) - // Second entry (deeper than knowledge//) should fail. + // Second entry (traversal out of the scope dir) should fail. assert.False(t, result.Files[1].Success) assert.NotEmpty(t, result.Files[1].Error) - // Only valid file exists on disk. + // Only valid file exists on disk; nothing escaped to knowledge/evil.md. assert.FileExists(t, filepath.Join(ws.Root(), "knowledge/account/DUTY.md")) - assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/sub/evil.md")) + assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/evil.md")) // Sentinel has only the good entry. sentinel := readSentinelMap(t, ws.Root()) assert.Equal(t, "goodsum", sentinel["knowledge/account/DUTY.md"]) - _, hasBad := sentinel["knowledge/account/sub/evil.md"] + _, hasBad := sentinel["knowledge/account/../evil.md"] assert.False(t, hasBad) } +// A plain nested path (`knowledge///`) is staged into the +// matching on-disk subdirectory, with the sentinel keyed by the full rel_path. +func TestStageKnowledgeFiles_NestedSubdir(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{ + Files: []protocol.KnowledgeFile{ + {RelPath: "knowledge/account/runbooks/lead-processing.md", Checksum: "nsum", ContentB64: b64("nested")}, + {RelPath: "knowledge/team_42/a/b/c.md", Checksum: "dsum", ContentB64: b64("deep")}, + }, + }) + require.NoError(t, err) + require.Len(t, result.Files, 2) + for i, st := range result.Files { + assert.True(t, st.Success, "file %d (%s) failed: %s", i, st.RelPath, st.Error) + } + + assert.Equal(t, "nested", fileContent(t, ws.Root(), "knowledge/account/runbooks/lead-processing.md")) + assert.Equal(t, "deep", fileContent(t, ws.Root(), "knowledge/team_42/a/b/c.md")) + + sentinel := readSentinelMap(t, ws.Root()) + assert.Equal(t, "nsum", sentinel["knowledge/account/runbooks/lead-processing.md"]) + assert.Equal(t, "dsum", sentinel["knowledge/team_42/a/b/c.md"]) +} + // Case 3: stage the same file twice with different checksums → second stage // overwrites content and updates the sentinel checksum. func TestStageKnowledgeFiles_Overwrite(t *testing.T) { @@ -219,8 +247,10 @@ func TestStageKnowledgeFiles_RejectedPaths(t *testing.T) { desc string }{ {"../etc/passwd", "path traversal with .."}, - {"knowledge/account/sub/x.md", "deeper than knowledge//"}, + {"knowledge/account/../evil.md", "parent traversal in leaf"}, + {"knowledge/account/sub/../../evil.md", "traversal escaping nested leaf"}, {"knowledge/account/.hidden.md", "leading-dot leaf"}, + {"knowledge/account/sub/.hidden.md", "leading-dot nested leaf"}, {"knowledge/account/" + sentinelName, "sentinel filename leaf"}, {"account/DUTY.md", "missing knowledge/ prefix"}, {"DUTY.md", "bare leaf without scope"}, @@ -255,6 +285,9 @@ func TestValidateKnowledgeRelPath(t *testing.T) { "knowledge/team_1/x.md", "knowledge/team_42/escalation.md", "knowledge/team_999/foo.md", // Double-dot in the middle of a filename is a legal flat name. "knowledge/account/foo..bar", "knowledge/account/v2..md", + // Nested subdirectories are now legal. + "knowledge/account/runbooks/lead.md", "knowledge/account/a/b/c.md", + "knowledge/team_42/runbooks/escalation.md", } for _, p := range valid { assert.NoError(t, validateKnowledgeRelPath(p), "should be valid: %q", p) @@ -264,8 +297,10 @@ func TestValidateKnowledgeRelPath(t *testing.T) { "", "..", "DUTY.md", "a", "account/x.md", `knowledge\account\x.md`, "knowledge/account/", "knowledge/account/.hidden", "knowledge/account/" + sentinelName, - "knowledge/account/sub/x.md", + // Traversal anywhere in the leaf, empty segments, and nested dotfiles. "knowledge/account/../etc/passwd", "knowledge/account/..", + "knowledge/account/sub/../../evil.md", "knowledge/account/sub/..", + "knowledge/account//x.md", "knowledge/account/sub/.hidden.md", "knowledge/foo/x.md", "knowledge/team_/x.md", "knowledge/team_42a/x.md", "knowledge/Team_42/x.md", "skill/account/x.md", "knowledge", "knowledge/", "knowledge/account", @@ -345,6 +380,40 @@ func TestReconcileKnowledgeManifest_AllMatch(t *testing.T) { assert.Equal(t, "sum-b", sentinel["knowledge/team_42/escalation.md"]) } +// Nested files are discovered by the recursive tree walk: a staged +// subdirectory file matching the manifest is kept (not re-listed in +// NeedsStage), and the same file becomes a prunable orphan once the manifest +// no longer declares it. +func TestReconcileKnowledgeManifest_Nested(t *testing.T) { + ws := newTestEnvironment(t) + ctx := context.Background() + + stageOne(t, ws, "knowledge/account/runbooks/lead.md", "sum-n", "N") + + // In-manifest → kept, nothing needs staging (proves the walk recurses). + res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{ + Files: []protocol.KnowledgeManifestEntry{ + {RelPath: "knowledge/account/runbooks/lead.md", Checksum: "sum-n"}, + }, + }) + require.NoError(t, err) + assert.Equal(t, 1, res.KeptCount) + assert.Empty(t, res.NeedsStage) + assert.Empty(t, res.Pruned) + assert.Equal(t, "N", fileContent(t, ws.Root(), "knowledge/account/runbooks/lead.md")) + + // Out of manifest → the nested file is pruned as an orphan. + res2, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{ + Files: []protocol.KnowledgeManifestEntry{}, + }) + require.NoError(t, err) + assert.Equal(t, []string{"knowledge/account/runbooks/lead.md"}, res2.Pruned) + assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/runbooks/lead.md")) + sentinelAfter := readSentinelMap(t, ws.Root()) + _, still := sentinelAfter["knowledge/account/runbooks/lead.md"] + assert.False(t, still) +} + // Reconcile case 2: a staged file is missing from the manifest entirely (the // pack-level deletion case). The runner deletes the file and drops it from // the sentinel; the surviving file is left alone.