From 0a82dd6696baba39ab22896a6e0b3600b96019dc Mon Sep 17 00:00:00 2001 From: Bobby Powers Date: Wed, 1 Jul 2026 11:42:24 -0700 Subject: [PATCH 1/4] server: add unauthenticated /healthz route and exit on failed boot GAE serves / as a static file, so production can be fully down (every Express instance crash-looping) while / stays green. /healthz is an Express-routed, unauthenticated uptime-check target that reflects the WASM engine preload via isReady(). It mounts ahead of the request logger and session middleware so constant polling stays cheap and out of the logs. Because initializeServerDependencies() throws before the route mounts, a preload failure surfaces as a non-responding instance, not a 503; the 503 branch is defense-in-depth. Also make a rejected main() log and exit(1) instead of falling into the unhandledRejection logger, which left a zombie process that never bound the port; on GAE that hung instances until the port-bind timeout instead of recycling them with a clear error. This is the in-repo half of issue 693 (the Cloud Monitoring channel/uptime-check/alert setup remains ops-side). --- docs/dev/deploy.md | 3 +- src/server/CLAUDE.md | 1 + src/server/app.ts | 13 ++++ src/server/healthz.ts | 41 +++++++++++++ src/server/index.ts | 13 +++- src/server/tests/healthz.test.ts | 93 +++++++++++++++++++++++++++++ src/server/tests/index-boot.test.ts | 48 +++++++++++++++ 7 files changed, 210 insertions(+), 2 deletions(-) create mode 100644 src/server/healthz.ts create mode 100644 src/server/tests/healthz.test.ts create mode 100644 src/server/tests/index-boot.test.ts diff --git a/docs/dev/deploy.md b/docs/dev/deploy.md index 439ee399f..c034d970d 100644 --- a/docs/dev/deploy.md +++ b/docs/dev/deploy.md @@ -133,6 +133,7 @@ The server loads `config/default.json`, then `config/production.json` when `NODE Against the `--no-promote` version URL, then again on production: - `curl -sI https:///` -> 200 HTML. View source: it links a hashed `/static/js/index..js` (literal `<%= PUBLIC_URL %>` means the build was skipped) and `/static/css/index..css`. +- `curl -s https:///healthz` -> 200 `ok`. This is the only check that exercises the Node server: `/` is a GAE static handler and stays green even when every Express instance is crash-looping (e.g. `ServerInitError`). A WASM preload failure aborts boot before the route mounts, so it shows up as a non-responding instance (connection failure / GAE 5xx), not a 503 -- treat any non-200 here as down. (The route's 503 branch is defense-in-depth, not the expected failure signal.) - `curl -sI https:///static/js/sd-component.js` -> 200 -- the embeddable web component; external sites `` from a different origin (e.g. `python3 -m http.server` on localhost) and confirm the diagram renders and simulates with no console errors. - `curl -sI` on `/robots.txt`, `/manifest.json`, `/favicon.ico`, `/legal/`, `/privacy/` -> 200; `curl -I http:///` -> 301 to https. - Browser: log in with Google, land on Home, no console errors. - New-user flow: sign in with a fresh account, claim a username, confirm the example projects appear and one opens and simulates. diff --git a/scripts/tests/validate-app-prod-config.test.mjs b/scripts/tests/validate-app-prod-config.test.mjs index a06ca26ed..d86375c6d 100644 --- a/scripts/tests/validate-app-prod-config.test.mjs +++ b/scripts/tests/validate-app-prod-config.test.mjs @@ -6,6 +6,9 @@ import { validateAppProdConfig } from '../validate-app-prod-config.mjs'; const MAX_INSTANCES_MESSAGE = 'automatic_scaling.max_instances must be set to a positive integer (cost cap; mirror the committed app.yaml)'; +const STATIC_CORS_MESSAGE = + 'handlers must include a /static handler with http_headers.Access-Control-Allow-Origin set to "*" (cross-origin embeds, issue #688; mirror the committed app.yaml)'; + // Every fixture that isn't specifically exercising the max_instances check // carries this block so its expected messages stay focused on one concern. const scalingBlock = ` @@ -13,6 +16,16 @@ automatic_scaling: max_instances: 8 `; +// Likewise for fixtures not exercising the /static CORS check. +const staticHandlerBlock = ` +handlers: +- url: /static + static_dir: public/static + secure: always + http_headers: + Access-Control-Allow-Origin: "*" +`; + const validConfig = ` runtime: nodejs24 @@ -22,7 +35,7 @@ build_env_variables: env_variables: NODE_ENV: production authentication__seshcookie__key: production-secret -${scalingBlock}`; +${scalingBlock}${staticHandlerBlock}`; function messagesFor(source) { return validateAppProdConfig(source, '.app.prod.yaml').map((error) => error.message); @@ -46,6 +59,7 @@ runtime: nodejs24 'env_variables.NODE_ENV must be set to production', 'env_variables.authentication__seshcookie__key must be set to the existing production session key', MAX_INSTANCES_MESSAGE, + STATIC_CORS_MESSAGE, ]); }); @@ -55,7 +69,7 @@ env_variables: NODE_ENV: production GOOGLE_NODE_RUN_SCRIPTS: '' authentication__seshcookie__key: production-secret -${scalingBlock}`); +${scalingBlock}${staticHandlerBlock}`); assert.deepEqual(messages, ['build_env_variables.GOOGLE_NODE_RUN_SCRIPTS must be set to an empty string']); }); @@ -67,7 +81,7 @@ build_env_variables: env_variables: NODE_ENV: production authentication__seshcookie__key: production-secret -${scalingBlock}`); +${scalingBlock}${staticHandlerBlock}`); assert.deepEqual(messages, ['build_env_variables.GOOGLE_NODE_RUN_SCRIPTS must be set to an empty string']); }); @@ -79,7 +93,7 @@ build_env_variables: authentication__seshcookie__key: production-secret env_variables: NODE_ENV: production -${scalingBlock}`); +${scalingBlock}${staticHandlerBlock}`); assert.deepEqual(messages, [ 'env_variables.authentication__seshcookie__key must be set to the existing production session key', @@ -95,7 +109,7 @@ build_env_variables: env_variables: NODE_ENV: production authentication__seshcookie__key: ${value} -${scalingBlock}`), +${scalingBlock}${staticHandlerBlock}`), ['env_variables.authentication__seshcookie__key must be set to the existing production session key'], ); } @@ -107,7 +121,7 @@ build_env_variables: GOOGLE_NODE_RUN_SCRIPTS: '' env_variables: authentication__seshcookie__key: production-secret -${scalingBlock}`); +${scalingBlock}${staticHandlerBlock}`); assert.deepEqual(messages, ['env_variables.NODE_ENV must be set to production']); }); @@ -120,14 +134,14 @@ build_env_variables: NODE_ENV: production env_variables: authentication__seshcookie__key: production-secret -${scalingBlock}`, +${scalingBlock}${staticHandlerBlock}`, ` build_env_variables: GOOGLE_NODE_RUN_SCRIPTS: '' env_variables: NODE_ENV: development authentication__seshcookie__key: production-secret -${scalingBlock}`, +${scalingBlock}${staticHandlerBlock}`, ]) { assert.deepEqual(messagesFor(source), ['env_variables.NODE_ENV must be set to production']); } @@ -140,7 +154,7 @@ build_env_variables: env_variables: NODE_ENV: production authentication__seshcookie__key: production-secret -`); +${staticHandlerBlock}`); assert.deepEqual(messages, [MAX_INSTANCES_MESSAGE]); }); @@ -156,13 +170,83 @@ env_variables: authentication__seshcookie__key: production-secret automatic_scaling: max_instances: ${value} -`), +${staticHandlerBlock}`), [MAX_INSTANCES_MESSAGE], `max_instances: ${value} should be rejected`, ); } }); + it('rejects a config with no handlers list', () => { + const messages = messagesFor(` +build_env_variables: + GOOGLE_NODE_RUN_SCRIPTS: '' +env_variables: + NODE_ENV: production + authentication__seshcookie__key: production-secret +${scalingBlock}`); + + assert.deepEqual(messages, [STATIC_CORS_MESSAGE]); + }); + + it('rejects a /static handler without the CORS header', () => { + const messages = messagesFor(` +build_env_variables: + GOOGLE_NODE_RUN_SCRIPTS: '' +env_variables: + NODE_ENV: production + authentication__seshcookie__key: production-secret +${scalingBlock} +handlers: +- url: /static + static_dir: public/static + secure: always +`); + + assert.deepEqual(messages, [STATIC_CORS_MESSAGE]); + }); + + it('rejects a /static handler whose CORS header is not the wildcard', () => { + const messages = messagesFor(` +build_env_variables: + GOOGLE_NODE_RUN_SCRIPTS: '' +env_variables: + NODE_ENV: production + authentication__seshcookie__key: production-secret +${scalingBlock} +handlers: +- url: /static + static_dir: public/static + secure: always + http_headers: + Access-Control-Allow-Origin: https://app.simlin.com +`); + + assert.deepEqual(messages, [STATIC_CORS_MESSAGE]); + }); + + it('finds the /static handler anywhere in the handlers list', () => { + const messages = messagesFor(` +build_env_variables: + GOOGLE_NODE_RUN_SCRIPTS: '' +env_variables: + NODE_ENV: production + authentication__seshcookie__key: production-secret +${scalingBlock} +handlers: +- url: /$ + static_files: public/index.html + upload: public/index.html +- url: /static + static_dir: public/static + secure: always + http_headers: + Access-Control-Allow-Origin: '*' +`); + + assert.deepEqual(messages, []); + }); + it('rejects malformed YAML', () => { const messages = messagesFor(` build_env_variables: diff --git a/scripts/validate-app-prod-config.mjs b/scripts/validate-app-prod-config.mjs index 42a74f870..da056e90e 100644 --- a/scripts/validate-app-prod-config.mjs +++ b/scripts/validate-app-prod-config.mjs @@ -72,6 +72,22 @@ export function validateAppProdConfig(source, filename = '.app.prod.yaml') { }); } + // Cross-origin embed contract (issue #688): third-party pages hotlink + // sd-component.js, and its engine worker/WASM loads are cross-origin + // requests against /static. Without the wildcard ACAO header the embed + // silently fails to initialize the engine -- a regression no same-origin + // smoke check can catch, so enforce the committed app.yaml's header here. + const handlers = Array.isArray(config.handlers) ? config.handlers : []; + const staticHandler = handlers.find((handler) => isRecord(handler) && handler.url === '/static'); + const headers = isRecord(staticHandler) ? staticHandler.http_headers : undefined; + const allowOrigin = isRecord(headers) ? headers['Access-Control-Allow-Origin'] : undefined; + if (allowOrigin !== '*') { + errors.push({ + message: + 'handlers must include a /static handler with http_headers.Access-Control-Allow-Origin set to "*" (cross-origin embeds, issue #688; mirror the committed app.yaml)', + }); + } + return errors; } diff --git a/src/engine/CLAUDE.md b/src/engine/CLAUDE.md index 8fac063fc..20e9ba4c0 100644 --- a/src/engine/CLAUDE.md +++ b/src/engine/CLAUDE.md @@ -29,6 +29,7 @@ For build/test/lint commands, see [docs/dev/commands.md](/docs/dev/commands.md). - `src/patch.ts` -- Model patching logic - `src/worker-protocol.ts` -- Worker message protocol - `src/backend-factory.ts` / `.browser.ts` / `.node.ts` -- Platform-specific backend factories +- `src/worker-trampoline.ts` -- Cross-origin embed support (issue #688): pure decision/construction functions plus an injectable spawn shell that boots the engine worker through a same-origin blob: trampoline when the resolved chunk URL is cross-origin (third-party pages hotlinking sd-component.js). The bundler-facing constraints (inline `new Worker(new URL(...))` pattern, classic-worker downgrade under UMD, `publicPath: 'auto'` deriving from `self.location` in classic worker chunks) are documented in the module header; `backend-factory.browser.ts` and `engine-worker.ts` are the two consumers - `src/internal/` -- Internal modules (project, model, memory, error, import-export) - `src/internal/wasmgen.ts` -- `simlin_model_compile_to_wasm` FFI wrapper + the pure `parseWasmLayout` / `readStridedSeries` decoders for the per-model wasm blob (re-exported via `@simlin/engine/internal`) - `src/internal/canonicalize.ts` -- pure `canonicalizeIdent`, a faithful port of the Rust canonicalizer (used to resolve caller names to wasm-layout slots); not re-exported from the `internal` barrel @@ -54,6 +55,7 @@ For build/test/lint commands, see [docs/dev/commands.md](/docs/dev/commands.md). - `tests/race.test.ts` -- Concurrency tests - `tests/cleanup.test.ts` -- Resource cleanup tests - `tests/wasmgen.test.ts`, `tests/canonicalize.test.ts` -- Unit tests for the pure layout decoders and `canonicalizeIdent` +- `tests/worker-trampoline.test.ts` -- Unit tests for the cross-origin worker trampoline (origin decision, trampoline source, spawn interception with fake Worker/URL) - `tests/wasm-backend.test.ts`, `tests/wasm-model.test.ts`, `tests/worker-wasm.test.ts` -- wasm-vs-VM parity through `DirectBackend`, the `Model`/`Sim` facade, and the Web Worker - `tests/wasm-ltm.test.ts` -- LTM-on-wasm parity through the TypeScript surface: drives `Model.simulate({ engine: 'wasm', enableLtm: true })` end-to-end and asserts the resulting `Run.links` match the VM (link set, polarities, per-step scores). Includes a `WorkerBackend` twin and an Unsupported-LTM case that surfaces as a rejection without falling back to the VM - `tests/ltm-test-helpers.ts` -- shared helpers for the LTM tests (`linksByKey`, `expectScoresClose`); kept separate from the test files so the wasm and worker LTM suites compare links the same way diff --git a/src/engine/src/backend-factory.browser.ts b/src/engine/src/backend-factory.browser.ts index 2cae9716e..453aa2a35 100644 --- a/src/engine/src/backend-factory.browser.ts +++ b/src/engine/src/backend-factory.browser.ts @@ -9,21 +9,71 @@ * keeping the main thread free for UI interaction. The Worker is created * lazily on first access and reused for all subsequent operations. * + * When the resolved worker chunk URL is cross-origin -- the embeddable web + * component hotlinked from a third-party page (issue #688) -- the worker is + * created through a same-origin blob trampoline instead, because the Worker + * constructor enforces the same-origin policy regardless of CORS. See + * worker-trampoline.ts for the mechanism. + * * This is selected at build time via tsconfig path mapping for browser builds. */ import { EngineBackend } from './backend'; import { WorkerBackend } from './worker-backend'; +import { spawnWithTrampoline } from './worker-trampoline'; import type { WorkerRequest, WorkerResponse } from './worker-protocol'; +// Bundlers that implement webpack's module variables (rspack, webpack) +// rewrite this free identifier to their runtime publicPath; with +// assetPrefix 'auto' that value is derived from the embedding