Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 49 additions & 46 deletions environment/knowledge.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/base64"
"encoding/json"
"fmt"
"io/fs"
"log/slog"
"os"
"path/filepath"
Expand All @@ -28,14 +29,17 @@ const (

// validateKnowledgeRelPath enforces the path rules for knowledge file operations.
//
// Layout: every staged file lives under `knowledge/<scope>/<leaf>` where
// scope is `account` or `team_<digits>`. The leading `knowledge/` segment
// matches the runner-relative form of the read path Safari uses
// (`<root>/knowledge/<scope>/<leaf>`), so a freshly staged file lands at
// Layout: every staged file lives under `knowledge/<scope>/<leaf…>` where
// scope is `account` or `team_<digits>` and the leaf may itself span one or
// more subdirectory segments (`runbooks/lead.md`). The leading `knowledge/`
// segment matches the runner-relative form of the read path Safari uses
// (`<root>/knowledge/<scope>/<leaf…>`), so a freshly staged file lands at
// exactly the location a follow-up read will probe — no contract drift.
//
// Bare-leaf paths (`DUTY.md`), deeper trees (`knowledge/account/sub/foo.md`),
// missing prefix (`account/foo.md`), unknown scope segments, and the
// Every leaf segment is validated so a nested path can never escape the scope
// directory when joined onto the workspace root: no traversal (`.`/`..`), no
// empty segment, and no dotfile. Bare-leaf paths (`DUTY.md`), a missing
// `knowledge/` prefix (`account/foo.md`), unknown scope segments, and the
// sentinel filename are all rejected. Backslashes are blocked to cover the
// Windows-style traversal vector defensively even though the runner only
// ships on unix.
Expand All @@ -47,23 +51,25 @@ func validateKnowledgeRelPath(relPath string) error {
return fmt.Errorf("rel_path must not contain backslash: %q", relPath)
}
parts := strings.Split(relPath, "/")
if len(parts) != 3 {
return fmt.Errorf("rel_path must be knowledge/<scope>/<leaf>, got %q", relPath)
if len(parts) < 3 {
return fmt.Errorf("rel_path must be knowledge/<scope>/<leaf>, got %q", relPath)
}
root, scope, leaf := parts[0], parts[1], parts[2]
root, scope, leafSegs := parts[0], parts[1], parts[2:]
if root != "knowledge" {
return fmt.Errorf("rel_path must start with 'knowledge/', got %q", relPath)
}
if scope != "account" && !teamScopeRe.MatchString(scope) {
return fmt.Errorf("rel_path scope must be 'account' or 'team_<digits>', got %q", scope)
}
if leaf == "" || leaf == "." || leaf == ".." {
return fmt.Errorf("rel_path leaf must be a real filename, got %q", leaf)
}
if strings.HasPrefix(leaf, ".") {
return fmt.Errorf("rel_path leaf must not start with '.': %q", leaf)
for _, seg := range leafSegs {
if seg == "" || seg == "." || seg == ".." {
return fmt.Errorf("rel_path leaf segment must be a real filename, got %q", relPath)
}
if strings.HasPrefix(seg, ".") {
return fmt.Errorf("rel_path leaf segment must not start with '.': %q", seg)
}
}
if leaf == sentinelName {
if leafSegs[len(leafSegs)-1] == sentinelName {
return fmt.Errorf("rel_path leaf must not be the sentinel filename")
}
return nil
Expand Down Expand Up @@ -326,43 +332,40 @@ func (e *Environment) ReconcileKnowledgeManifest(ctx context.Context, args *prot
return result, nil
}

// walkKnowledgeTree returns every leaf file under <knowledgeRoot>/<scope>/
// as `knowledge/<scope>/<leaf>` rel-paths. Hidden files (the sentinel) and
// anything failing validation are skipped; nested directories beneath a scope
// are ignored because the layout forbids them.
// walkKnowledgeTree returns every leaf file anywhere under
// <knowledgeRoot>/<scope>/… as `knowledge/<scope>/<leaf…>` rel-paths,
// recursing into subdirectories so nested files are reconciled too. Hidden
// files (the sentinel), files outside a valid scope, and anything failing
// validation are skipped. The sentinel lives one level above knowledgeRoot
// (at the workspace root), so it is never visited here.
func walkKnowledgeTree(knowledgeRoot string) ([]string, error) {
entries, err := os.ReadDir(knowledgeRoot)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, fmt.Errorf("read knowledge root: %w", err)
}

var paths []string
for _, scope := range entries {
if !scope.IsDir() {
continue
walkErr := filepath.WalkDir(knowledgeRoot, func(p string, d fs.DirEntry, err error) error {
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
scopeName := scope.Name()
if scopeName != "account" && !teamScopeRe.MatchString(scopeName) {
continue
if d.IsDir() {
return nil
}
leaves, err := os.ReadDir(filepath.Join(knowledgeRoot, scopeName))
if err != nil {
slog.Warn("failed to read scope directory", "scope", scopeName, "error", err)
continue
sub, relErr := filepath.Rel(knowledgeRoot, p)
if relErr != nil {
return nil
}
for _, leaf := range leaves {
if leaf.IsDir() {
continue
}
rel := "knowledge/" + scopeName + "/" + leaf.Name()
if err := validateKnowledgeRelPath(rel); err != nil {
continue
}
paths = append(paths, rel)
rel := "knowledge/" + filepath.ToSlash(sub)
if err := validateKnowledgeRelPath(rel); err != nil {
return nil
}
paths = append(paths, rel)
return nil
})
if walkErr != nil {
if os.IsNotExist(walkErr) {
return nil, nil
}
return nil, fmt.Errorf("walk knowledge root: %w", walkErr)
}
return paths, nil
}
Expand Down
87 changes: 78 additions & 9 deletions environment/knowledge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,19 @@ func TestStageKnowledgeFiles_AccountAndTeam(t *testing.T) {
assert.Equal(t, "tsum", sentinel["knowledge/team_42/escalation.md"])
}

// Case 2: mixed batch — one valid file + one with an extra '/' segment.
// Case 2: mixed batch — one valid file + one with a `..` traversal segment.
// The valid file must land; the invalid one must return an error in the ack;
// the sentinel must contain only the valid entry.
// the sentinel must contain only the valid entry. (A plain nested path is now
// valid — see TestStageKnowledgeFiles_NestedSubdir — so the rejected case here
// is a genuine escape attempt, not merely a deeper tree.)
func TestStageKnowledgeFiles_MixedValidity(t *testing.T) {
ws := newTestEnvironment(t)
ctx := context.Background()

result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{
Files: []protocol.KnowledgeFile{
{RelPath: "knowledge/account/DUTY.md", Checksum: "goodsum", ContentB64: b64("content")},
{RelPath: "knowledge/account/sub/evil.md", Checksum: "badsum", ContentB64: b64("evil")},
{RelPath: "knowledge/account/../evil.md", Checksum: "badsum", ContentB64: b64("evil")},
},
})
require.NoError(t, err)
Expand All @@ -103,21 +105,47 @@ func TestStageKnowledgeFiles_MixedValidity(t *testing.T) {
assert.True(t, result.Files[0].Success)
assert.Empty(t, result.Files[0].Error)

// Second entry (deeper than knowledge/<scope>/<leaf>) should fail.
// Second entry (traversal out of the scope dir) should fail.
assert.False(t, result.Files[1].Success)
assert.NotEmpty(t, result.Files[1].Error)

// Only valid file exists on disk.
// Only valid file exists on disk; nothing escaped to knowledge/evil.md.
assert.FileExists(t, filepath.Join(ws.Root(), "knowledge/account/DUTY.md"))
assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/sub/evil.md"))
assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/evil.md"))

// Sentinel has only the good entry.
sentinel := readSentinelMap(t, ws.Root())
assert.Equal(t, "goodsum", sentinel["knowledge/account/DUTY.md"])
_, hasBad := sentinel["knowledge/account/sub/evil.md"]
_, hasBad := sentinel["knowledge/account/../evil.md"]
assert.False(t, hasBad)
}

// A plain nested path (`knowledge/<scope>/<sub>/<file>`) is staged into the
// matching on-disk subdirectory, with the sentinel keyed by the full rel_path.
func TestStageKnowledgeFiles_NestedSubdir(t *testing.T) {
ws := newTestEnvironment(t)
ctx := context.Background()

result, err := ws.StageKnowledgeFiles(ctx, &protocol.StageKnowledgeFilesArgs{
Files: []protocol.KnowledgeFile{
{RelPath: "knowledge/account/runbooks/lead-processing.md", Checksum: "nsum", ContentB64: b64("nested")},
{RelPath: "knowledge/team_42/a/b/c.md", Checksum: "dsum", ContentB64: b64("deep")},
},
})
require.NoError(t, err)
require.Len(t, result.Files, 2)
for i, st := range result.Files {
assert.True(t, st.Success, "file %d (%s) failed: %s", i, st.RelPath, st.Error)
}

assert.Equal(t, "nested", fileContent(t, ws.Root(), "knowledge/account/runbooks/lead-processing.md"))
assert.Equal(t, "deep", fileContent(t, ws.Root(), "knowledge/team_42/a/b/c.md"))

sentinel := readSentinelMap(t, ws.Root())
assert.Equal(t, "nsum", sentinel["knowledge/account/runbooks/lead-processing.md"])
assert.Equal(t, "dsum", sentinel["knowledge/team_42/a/b/c.md"])
}

// Case 3: stage the same file twice with different checksums → second stage
// overwrites content and updates the sentinel checksum.
func TestStageKnowledgeFiles_Overwrite(t *testing.T) {
Expand Down Expand Up @@ -219,8 +247,10 @@ func TestStageKnowledgeFiles_RejectedPaths(t *testing.T) {
desc string
}{
{"../etc/passwd", "path traversal with .."},
{"knowledge/account/sub/x.md", "deeper than knowledge/<scope>/<leaf>"},
{"knowledge/account/../evil.md", "parent traversal in leaf"},
{"knowledge/account/sub/../../evil.md", "traversal escaping nested leaf"},
{"knowledge/account/.hidden.md", "leading-dot leaf"},
{"knowledge/account/sub/.hidden.md", "leading-dot nested leaf"},
{"knowledge/account/" + sentinelName, "sentinel filename leaf"},
{"account/DUTY.md", "missing knowledge/ prefix"},
{"DUTY.md", "bare leaf without scope"},
Expand Down Expand Up @@ -255,6 +285,9 @@ func TestValidateKnowledgeRelPath(t *testing.T) {
"knowledge/team_1/x.md", "knowledge/team_42/escalation.md", "knowledge/team_999/foo.md",
// Double-dot in the middle of a filename is a legal flat name.
"knowledge/account/foo..bar", "knowledge/account/v2..md",
// Nested subdirectories are now legal.
"knowledge/account/runbooks/lead.md", "knowledge/account/a/b/c.md",
"knowledge/team_42/runbooks/escalation.md",
}
for _, p := range valid {
assert.NoError(t, validateKnowledgeRelPath(p), "should be valid: %q", p)
Expand All @@ -264,8 +297,10 @@ func TestValidateKnowledgeRelPath(t *testing.T) {
"", "..", "DUTY.md", "a", "account/x.md", `knowledge\account\x.md`,
"knowledge/account/", "knowledge/account/.hidden",
"knowledge/account/" + sentinelName,
"knowledge/account/sub/x.md",
// Traversal anywhere in the leaf, empty segments, and nested dotfiles.
"knowledge/account/../etc/passwd", "knowledge/account/..",
"knowledge/account/sub/../../evil.md", "knowledge/account/sub/..",
"knowledge/account//x.md", "knowledge/account/sub/.hidden.md",
"knowledge/foo/x.md", "knowledge/team_/x.md",
"knowledge/team_42a/x.md", "knowledge/Team_42/x.md",
"skill/account/x.md", "knowledge", "knowledge/", "knowledge/account",
Expand Down Expand Up @@ -345,6 +380,40 @@ func TestReconcileKnowledgeManifest_AllMatch(t *testing.T) {
assert.Equal(t, "sum-b", sentinel["knowledge/team_42/escalation.md"])
}

// Nested files are discovered by the recursive tree walk: a staged
// subdirectory file matching the manifest is kept (not re-listed in
// NeedsStage), and the same file becomes a prunable orphan once the manifest
// no longer declares it.
func TestReconcileKnowledgeManifest_Nested(t *testing.T) {
ws := newTestEnvironment(t)
ctx := context.Background()

stageOne(t, ws, "knowledge/account/runbooks/lead.md", "sum-n", "N")

// In-manifest → kept, nothing needs staging (proves the walk recurses).
res, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{
Files: []protocol.KnowledgeManifestEntry{
{RelPath: "knowledge/account/runbooks/lead.md", Checksum: "sum-n"},
},
})
require.NoError(t, err)
assert.Equal(t, 1, res.KeptCount)
assert.Empty(t, res.NeedsStage)
assert.Empty(t, res.Pruned)
assert.Equal(t, "N", fileContent(t, ws.Root(), "knowledge/account/runbooks/lead.md"))

// Out of manifest → the nested file is pruned as an orphan.
res2, err := ws.ReconcileKnowledgeManifest(ctx, &protocol.ReconcileKnowledgeManifestArgs{
Files: []protocol.KnowledgeManifestEntry{},
})
require.NoError(t, err)
assert.Equal(t, []string{"knowledge/account/runbooks/lead.md"}, res2.Pruned)
assert.NoFileExists(t, filepath.Join(ws.Root(), "knowledge/account/runbooks/lead.md"))
sentinelAfter := readSentinelMap(t, ws.Root())
_, still := sentinelAfter["knowledge/account/runbooks/lead.md"]
assert.False(t, still)
}

// Reconcile case 2: a staged file is missing from the manifest entirely (the
// pack-level deletion case). The runner deletes the file and drops it from
// the sentinel; the surviving file is left alone.
Expand Down
Loading