diff --git a/internal/catalogue/catalogue.go b/internal/catalogue/catalogue.go index 61858b09..aad5df3a 100644 --- a/internal/catalogue/catalogue.go +++ b/internal/catalogue/catalogue.go @@ -146,27 +146,69 @@ type Provider struct { mu sync.RWMutex pins map[string]string + + // refresh-on-miss: an installed app with no pin is likely one that was + // pinned in the catalogue after our last periodic refresh. Rather than make + // it wait for the 10-minute tick (and be refused meanwhile), a pin miss + // triggers a rate-limited background refresh so the next supervisor scan + // can spawn it. + refreshMu sync.Mutex + lastRefresh time.Time + refreshInFlight bool + minRefreshGap time.Duration } +// missRefreshGap bounds how often a pin miss may trigger a catalogue refetch, so +// a genuinely unpinned app can't cause a refresh storm. +const missRefreshGap = 30 * time.Second + // NewProvider builds a Provider for the catalogue at url, caching the last // verified pin set at cachePath (empty disables the cache). func NewProvider(url, cachePath string) *Provider { - return &Provider{url: url, cachePath: cachePath, pins: map[string]string{}} + return &Provider{url: url, cachePath: cachePath, pins: map[string]string{}, minRefreshGap: missRefreshGap} } // Publisher implements appstore.Config.CataloguePublisher: it returns the -// catalogue-pinned publisher for appID and whether appID is pinned. +// catalogue-pinned publisher for appID and whether appID is pinned. On a miss it +// kicks off a rate-limited background refresh (see refreshOnMiss), so an app +// that was pinned since the last periodic refresh becomes spawnable within a +// scan cycle instead of after the next 10-minute tick. func (p *Provider) Publisher(appID string) (string, bool) { p.mu.RLock() - defer p.mu.RUnlock() pub, ok := p.pins[appID] + p.mu.RUnlock() + if !ok { + p.refreshOnMiss() + } return pub, ok } +// refreshOnMiss launches a single background Refresh if one isn't already +// running and the last refresh is older than minRefreshGap. Non-blocking: the +// caller (a supervisor scan) never waits on the network. +func (p *Provider) refreshOnMiss() { + p.refreshMu.Lock() + if p.refreshInFlight || time.Since(p.lastRefresh) < p.minRefreshGap { + p.refreshMu.Unlock() + return + } + p.refreshInFlight = true + p.refreshMu.Unlock() + go func() { + _ = p.Refresh() + p.refreshMu.Lock() + p.refreshInFlight = false + p.refreshMu.Unlock() + }() +} + // Refresh fetches + verifies the catalogue and atomically swaps in the new pin // set. On success it also writes the disk cache. On failure the previous pins // are kept (so a transient outage doesn't suddenly fail-close running apps). func (p *Provider) Refresh() error { + p.refreshMu.Lock() + p.lastRefresh = time.Now() + p.refreshMu.Unlock() pins, err := LoadPublishers(p.url) if err != nil { return err diff --git a/internal/catalogue/catalogue_test.go b/internal/catalogue/catalogue_test.go index d60eec69..c0100a02 100644 --- a/internal/catalogue/catalogue_test.go +++ b/internal/catalogue/catalogue_test.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/pilot-protocol/pilotprotocol/internal/catalogtrust" ) @@ -113,6 +114,82 @@ func TestProvider_RefreshPublisherAndCache(t *testing.T) { } } +func TestProvider_RefreshOnMissPicksUpNewlyPinnedApp(t *testing.T) { + dir := t.TempDir() + cat := filepath.Join(dir, "catalogue.json") + url := "file://" + cat + + // writeSigned writes the catalogue body + a fresh valid .sig and returns a + // restore for the ephemeral key swap. + writeSigned := func(body string) func() { + if err := os.WriteFile(cat, []byte(body), 0o600); err != nil { + t.Fatal(err) + } + sig, restore := catalogtrust.SignWithEphemeralKey([]byte(body)) + if err := os.WriteFile(cat+".sig", []byte(base64.StdEncoding.EncodeToString(sig)), 0o600); err != nil { + t.Fatal(err) + } + return restore + } + + const onlyA = `{"version":2,"apps":[{"id":"io.test.a","publisher":"ed25519:3QJm6H6OdjtfrF+Es1lrRjfFmdtq2tGvVSWxia63vcI="}]}` + const aAndB = `{"version":2,"apps":[ + {"id":"io.test.a","publisher":"ed25519:3QJm6H6OdjtfrF+Es1lrRjfFmdtq2tGvVSWxia63vcI="}, + {"id":"io.test.b","publisher":"ed25519:VF8fdEP/Oe2aWN3ozQ7Ar22137tHb7dkSw0hlzlk/os="}]}` + + restore1 := writeSigned(onlyA) + + p := NewProvider(url, "") + p.minRefreshGap = 0 // allow a miss to refresh immediately (no cooldown in the test) + if err := p.Refresh(); err != nil { + t.Fatalf("initial Refresh: %v", err) + } + // Assert B is not pinned yet by reading the map directly — using Publisher here + // would kick off a background refresh that races the re-sign below (the test's + // signing key is process-global; production never swaps it at runtime). + p.mu.RLock() + _, hasB := p.pins["io.test.b"] + p.mu.RUnlock() + if hasB { + t.Fatal("io.test.b must not be pinned before it is added to the catalogue") + } + restore1() // no refresh in flight here; safe to restore the key + + // Pin B in the catalogue (re-signed). A running daemon would not see it until + // the next periodic refresh — but a Publisher miss should refetch. From here on + // this is the only signing key live, so background refreshes don't race it. + restore2 := writeSigned(aAndB) + defer restore2() + + p.Publisher("io.test.b") // miss → triggers background refresh + got := false + for i := 0; i < 200; i++ { + if _, ok := p.Publisher("io.test.b"); ok { + got = true + break + } + time.Sleep(5 * time.Millisecond) + } + if !got { + t.Fatal("refresh-on-miss did not pick up the newly-pinned app within 1s") + } + + // Drain: stop new refreshes and wait for any in-flight one to finish before the + // deferred restore swaps the global signing key (else the swap races the read). + p.refreshMu.Lock() + p.minRefreshGap = time.Hour + p.refreshMu.Unlock() + for i := 0; i < 200; i++ { + p.refreshMu.Lock() + inFlight := p.refreshInFlight + p.refreshMu.Unlock() + if !inFlight { + break + } + time.Sleep(5 * time.Millisecond) + } +} + func TestProvider_NilCacheAndFailClosed(t *testing.T) { // A provider that has never loaded anything reports nothing pinned. p := NewProvider("file:///nonexistent", "")