From 4e0a00840389fca614b115c1b77d7c69b12ce200 Mon Sep 17 00:00:00 2001 From: Alex Godoroja Date: Tue, 23 Jun 2026 21:00:23 -0700 Subject: [PATCH] catalogue: refresh pins on a miss so newly-pinned apps spawn without a restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The daemon refreshes its catalogue publisher pins on a 10-minute timer. If an app is pinned in the catalogue after the last refresh and the user installs it, the supervisor finds no pin and refuses it ("not pinned by the signed catalogue") for up to 10 minutes — exactly what happened with io.telepat.ideon-free after its catalogue entry (#326) landed while daemons were already running. Fix: a pin miss in Provider.Publisher kicks off a rate-limited (30s) background catalogue refresh. The next supervisor scan (seconds later) then finds the pin and spawns the app — no restart, no 10-minute wait. Non-blocking and single-flight so a genuinely unpinned app can't cause a refresh storm. Tested: unit test for refresh-on-miss (-race); e2e reproduced the refusal, then the daemon self-healed ~24s after the pin landed, with no restart. --- internal/catalogue/catalogue.go | 48 +++++++++++++++-- internal/catalogue/catalogue_test.go | 77 ++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 3 deletions(-) diff --git a/internal/catalogue/catalogue.go b/internal/catalogue/catalogue.go index 61858b09..aad5df3a 100644 --- a/internal/catalogue/catalogue.go +++ b/internal/catalogue/catalogue.go @@ -146,27 +146,69 @@ type Provider struct { mu sync.RWMutex pins map[string]string + + // refresh-on-miss: an installed app with no pin is likely one that was + // pinned in the catalogue after our last periodic refresh. Rather than make + // it wait for the 10-minute tick (and be refused meanwhile), a pin miss + // triggers a rate-limited background refresh so the next supervisor scan + // can spawn it. + refreshMu sync.Mutex + lastRefresh time.Time + refreshInFlight bool + minRefreshGap time.Duration } +// missRefreshGap bounds how often a pin miss may trigger a catalogue refetch, so +// a genuinely unpinned app can't cause a refresh storm. +const missRefreshGap = 30 * time.Second + // NewProvider builds a Provider for the catalogue at url, caching the last // verified pin set at cachePath (empty disables the cache). func NewProvider(url, cachePath string) *Provider { - return &Provider{url: url, cachePath: cachePath, pins: map[string]string{}} + return &Provider{url: url, cachePath: cachePath, pins: map[string]string{}, minRefreshGap: missRefreshGap} } // Publisher implements appstore.Config.CataloguePublisher: it returns the -// catalogue-pinned publisher for appID and whether appID is pinned. +// catalogue-pinned publisher for appID and whether appID is pinned. On a miss it +// kicks off a rate-limited background refresh (see refreshOnMiss), so an app +// that was pinned since the last periodic refresh becomes spawnable within a +// scan cycle instead of after the next 10-minute tick. func (p *Provider) Publisher(appID string) (string, bool) { p.mu.RLock() - defer p.mu.RUnlock() pub, ok := p.pins[appID] + p.mu.RUnlock() + if !ok { + p.refreshOnMiss() + } return pub, ok } +// refreshOnMiss launches a single background Refresh if one isn't already +// running and the last refresh is older than minRefreshGap. Non-blocking: the +// caller (a supervisor scan) never waits on the network. +func (p *Provider) refreshOnMiss() { + p.refreshMu.Lock() + if p.refreshInFlight || time.Since(p.lastRefresh) < p.minRefreshGap { + p.refreshMu.Unlock() + return + } + p.refreshInFlight = true + p.refreshMu.Unlock() + go func() { + _ = p.Refresh() + p.refreshMu.Lock() + p.refreshInFlight = false + p.refreshMu.Unlock() + }() +} + // Refresh fetches + verifies the catalogue and atomically swaps in the new pin // set. On success it also writes the disk cache. On failure the previous pins // are kept (so a transient outage doesn't suddenly fail-close running apps). func (p *Provider) Refresh() error { + p.refreshMu.Lock() + p.lastRefresh = time.Now() + p.refreshMu.Unlock() pins, err := LoadPublishers(p.url) if err != nil { return err diff --git a/internal/catalogue/catalogue_test.go b/internal/catalogue/catalogue_test.go index d60eec69..c0100a02 100644 --- a/internal/catalogue/catalogue_test.go +++ b/internal/catalogue/catalogue_test.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/pilot-protocol/pilotprotocol/internal/catalogtrust" ) @@ -113,6 +114,82 @@ func TestProvider_RefreshPublisherAndCache(t *testing.T) { } } +func TestProvider_RefreshOnMissPicksUpNewlyPinnedApp(t *testing.T) { + dir := t.TempDir() + cat := filepath.Join(dir, "catalogue.json") + url := "file://" + cat + + // writeSigned writes the catalogue body + a fresh valid .sig and returns a + // restore for the ephemeral key swap. + writeSigned := func(body string) func() { + if err := os.WriteFile(cat, []byte(body), 0o600); err != nil { + t.Fatal(err) + } + sig, restore := catalogtrust.SignWithEphemeralKey([]byte(body)) + if err := os.WriteFile(cat+".sig", []byte(base64.StdEncoding.EncodeToString(sig)), 0o600); err != nil { + t.Fatal(err) + } + return restore + } + + const onlyA = `{"version":2,"apps":[{"id":"io.test.a","publisher":"ed25519:3QJm6H6OdjtfrF+Es1lrRjfFmdtq2tGvVSWxia63vcI="}]}` + const aAndB = `{"version":2,"apps":[ + {"id":"io.test.a","publisher":"ed25519:3QJm6H6OdjtfrF+Es1lrRjfFmdtq2tGvVSWxia63vcI="}, + {"id":"io.test.b","publisher":"ed25519:VF8fdEP/Oe2aWN3ozQ7Ar22137tHb7dkSw0hlzlk/os="}]}` + + restore1 := writeSigned(onlyA) + + p := NewProvider(url, "") + p.minRefreshGap = 0 // allow a miss to refresh immediately (no cooldown in the test) + if err := p.Refresh(); err != nil { + t.Fatalf("initial Refresh: %v", err) + } + // Assert B is not pinned yet by reading the map directly — using Publisher here + // would kick off a background refresh that races the re-sign below (the test's + // signing key is process-global; production never swaps it at runtime). + p.mu.RLock() + _, hasB := p.pins["io.test.b"] + p.mu.RUnlock() + if hasB { + t.Fatal("io.test.b must not be pinned before it is added to the catalogue") + } + restore1() // no refresh in flight here; safe to restore the key + + // Pin B in the catalogue (re-signed). A running daemon would not see it until + // the next periodic refresh — but a Publisher miss should refetch. From here on + // this is the only signing key live, so background refreshes don't race it. + restore2 := writeSigned(aAndB) + defer restore2() + + p.Publisher("io.test.b") // miss → triggers background refresh + got := false + for i := 0; i < 200; i++ { + if _, ok := p.Publisher("io.test.b"); ok { + got = true + break + } + time.Sleep(5 * time.Millisecond) + } + if !got { + t.Fatal("refresh-on-miss did not pick up the newly-pinned app within 1s") + } + + // Drain: stop new refreshes and wait for any in-flight one to finish before the + // deferred restore swaps the global signing key (else the swap races the read). + p.refreshMu.Lock() + p.minRefreshGap = time.Hour + p.refreshMu.Unlock() + for i := 0; i < 200; i++ { + p.refreshMu.Lock() + inFlight := p.refreshInFlight + p.refreshMu.Unlock() + if !inFlight { + break + } + time.Sleep(5 * time.Millisecond) + } +} + func TestProvider_NilCacheAndFailClosed(t *testing.T) { // A provider that has never loaded anything reports nothing pinned. p := NewProvider("file:///nonexistent", "")