diff --git a/.gitattributes b/.gitattributes index 935693872..9a3556920 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7,6 +7,11 @@ *.ps1 text eol=lf pnpm-lock.yaml text eol=lf +# Force LF for agent-skills artifacts so sha256 digests are byte-stable across +# Windows and Linux checkouts (referenced by /.well-known/agent-skills/index.json). +src/frontend/public/.well-known/agent-skills/**/*.md text eol=lf +src/frontend/public/.well-known/agent-skills/index.json text eol=lf + # Explicitly mark binary files to avoid corruption *.png binary *.jpg binary diff --git a/Aspire.Dev.slnx b/Aspire.Dev.slnx index 8aa19d99a..5aa406d83 100644 --- a/Aspire.Dev.slnx +++ b/Aspire.Dev.slnx @@ -8,6 +8,7 @@ + diff --git a/src/frontend/package.json b/src/frontend/package.json index 419b58916..6ca4e6e1c 100644 --- a/src/frontend/package.json +++ b/src/frontend/package.json @@ -16,14 +16,16 @@ "scripts": { "git-env": "node ./scripts/write-git-env.mjs", "check-data": "node ./scripts/check-data-files.mjs", + "compute-skill-digests": "node ./scripts/compute-skill-digests.mjs", + "verify-skill-digests": "node ./scripts/compute-skill-digests.mjs --check", "twoslash-types": "tsx ./scripts/generate-twoslash-types.ts", - "dev": "pnpm git-env && pnpm check-data && astro dev", - "dev:host": "pnpm git-env && pnpm check-data && astro dev --host", - "start": "pnpm git-env && pnpm check-data && astro dev", - "start:host": "pnpm git-env && pnpm check-data && astro dev --host", - "build": "pnpm git-env && astro build", - "build:skip-search": "pnpm git-env && astro build --mode skip-search", - "build:production": "pnpm git-env && astro build --mode production", + "dev": "pnpm git-env && pnpm check-data && pnpm compute-skill-digests && astro dev", + "dev:host": "pnpm git-env && pnpm check-data && pnpm compute-skill-digests && astro dev --host", + "start": "pnpm git-env && pnpm check-data && pnpm compute-skill-digests && astro dev", + "start:host": "pnpm git-env && pnpm check-data && pnpm compute-skill-digests && astro dev --host", + "build": "pnpm git-env && pnpm compute-skill-digests && astro build", + "build:skip-search": "pnpm git-env && pnpm compute-skill-digests && astro build --mode skip-search", + "build:production": "pnpm git-env && pnpm compute-skill-digests && astro build --mode production", "preview": "astro preview", "preview:host": "astro preview --host", "astro": "pnpm git-env && astro", @@ -42,7 +44,7 @@ "test:e2e": "playwright test", "test:e2e:install": "playwright install chromium", "test:e2e:serve": "pnpm git-env && pnpm check-data && astro dev --host 127.0.0.1 --port 4321", - "lint": "pnpm git-env && pnpm exec astro sync && eslint . --max-warnings 0", + "lint": "pnpm git-env && pnpm exec astro sync && pnpm verify-skill-digests && eslint . --max-warnings 0", "format": "prettier -w --cache --plugin prettier-plugin-astro .", "update:all": "pnpm update:integrations && pnpm update:github-stats && pnpm update:samples", "update:schemas": "tsx ./scripts/update-schemas.ts", diff --git a/src/frontend/public/.well-known/agent-skills/getting-started-with-aspire/SKILL.md b/src/frontend/public/.well-known/agent-skills/getting-started-with-aspire/SKILL.md new file mode 100644 index 000000000..3500923fd --- /dev/null +++ b/src/frontend/public/.well-known/agent-skills/getting-started-with-aspire/SKILL.md @@ -0,0 +1,58 @@ +--- +name: getting-started-with-aspire +description: "Use this skill when a developer asks how to begin using Aspire — installing the Aspire CLI, creating a new Aspire app, running it locally, or finding the authoritative docs, integration catalog, or API reference on aspire.dev. Use it for questions like \"how do I install aspire?\", \"how do I create a new aspire app?\", \"how do I run my aspire app?\", or \"where is the documentation for aspire?\". Do not use it for operating an existing Aspire AppHost (use the official `aspire` skill at github.com/microsoft/aspire/tree/main/.agents/skills/aspire), authoring AppHost code, or adding integrations to an existing app." +--- + +# Getting started with Aspire + +[Aspire](https://aspire.dev) is a polyglot stack for building, running, debugging, and deploying distributed applications. The AppHost can be authored in C# or TypeScript today, with additional languages (Java, Go, Python, Rust, …) on the roadmap. The Aspire CLI (`aspire`) is the entry point for everything: scaffolding apps, running them locally, inspecting state, and deploying. + +## When to use this skill + +- The user is new to Aspire and wants to install the CLI. +- The user wants to scaffold a brand-new Aspire app. +- The user wants to know how to run the app they just created. +- The user is looking for the authoritative docs, integration catalog, or API reference. + +## Don't use this skill for + +- Operating an existing Aspire AppHost (resources, logs, traces, dashboard commands). That's the [official `aspire` skill](https://github.com/microsoft/aspire/tree/main/.agents/skills/aspire). +- Editing AppHost source code (C# or TypeScript) — consult the API reference on aspire.dev. + +## Install the Aspire CLI + +The official cross-platform installers are hosted on aspire.dev: + +- **Windows (PowerShell):** `iex (irm https://aspire.dev/install.ps1)` +- **macOS / Linux (bash):** `curl -fsSL https://aspire.dev/install.sh | bash` + +After install, verify with `aspire --version`. Do not install Aspire from NuGet/npm directly when the user wants the CLI — the install script is the supported path. + +## Create a new Aspire app + +```sh +aspire new +``` + +`aspire new` is **interactive**. It prompts for the template (for example `aspire-starter`, `apphost`, `apphost-ts`), the project name, the output location, and the language. It creates a subfolder for the new project, so run it from the parent directory where you want the project folder to live. Do not pass fabricated template flags; let the CLI prompt the user. + +## Run the app + +```sh +cd +aspire start +``` + +`aspire start` launches the AppHost and the Aspire dashboard. Prefer `aspire start` over `dotnet run` for AppHosts — `aspire start` is the agent-friendly path; `aspire run` blocks the terminal. + +## Where to learn more + +- **Documentation hub:** +- **CLI reference:** +- **Integration catalog:** +- **C# API reference:** +- **TypeScript API reference:** +- **LLM-friendly corpus:** , +- **Per-page markdown:** every page on aspire.dev is also available as `.md` (or via `Accept: text/markdown` content negotiation). +- **In-page search tool:** when running in a WebMCP-capable browser, the `search-aspire-docs` tool is registered on every aspire.dev page. +- **Source repository:** diff --git a/src/frontend/public/.well-known/agent-skills/index.json b/src/frontend/public/.well-known/agent-skills/index.json new file mode 100644 index 000000000..8a9dbeab0 --- /dev/null +++ b/src/frontend/public/.well-known/agent-skills/index.json @@ -0,0 +1,13 @@ +{ + "$schema": "https://agentskills.io/schema/v0.2.0/discovery.schema.json", + "version": "0.2.0", + "skills": [ + { + "name": "getting-started-with-aspire", + "type": "skill-md", + "description": "Install the Aspire CLI, create a new Aspire app, run it locally, and find the authoritative docs on aspire.dev.", + "url": "/.well-known/agent-skills/getting-started-with-aspire/SKILL.md", + "digest": "sha256:a8ab4851cd19b2bdbe065976180b17b6e741c36bad6b3301668a1bd49e2b52fd" + } + ] +} diff --git a/src/frontend/public/robots.txt b/src/frontend/public/robots.txt index 3a3497a3a..ad88bc9cd 100644 --- a/src/frontend/public/robots.txt +++ b/src/frontend/public/robots.txt @@ -1,4 +1,5 @@ User-agent: * +Content-Signal: ai-train=yes, search=yes, ai-input=yes Allow: / Sitemap: https://aspire.dev/sitemap-index.xml \ No newline at end of file diff --git a/src/frontend/scripts/compute-skill-digests.mjs b/src/frontend/scripts/compute-skill-digests.mjs new file mode 100644 index 000000000..8ef7a8989 --- /dev/null +++ b/src/frontend/scripts/compute-skill-digests.mjs @@ -0,0 +1,106 @@ +// @ts-check +/** + * compute-skill-digests.mjs + * + * Recomputes `digest` fields in `public/.well-known/agent-skills/index.json` by + * sha256-hashing each referenced skill artifact's raw bytes. Run as a `prebuild` + * step so the published index.json always matches the served SKILL.md bytes. + * + * Usage: + * node scripts/compute-skill-digests.mjs # update digests in place + * node scripts/compute-skill-digests.mjs --check # exit non-zero if stale + * + * The tool deliberately reads files as raw bytes (no LF/CRLF normalization) + * — `.gitattributes` pins LF for these paths so the on-disk bytes are stable + * across Windows and Linux checkouts. + */ + +import { createHash } from 'node:crypto'; +import { readFile, writeFile } from 'node:fs/promises'; +import { fileURLToPath } from 'node:url'; +import path from 'node:path'; + +const repoRoot = path.resolve(fileURLToPath(import.meta.url), '..', '..'); +const publicRoot = path.join(repoRoot, 'public'); +const indexPath = path.join(publicRoot, '.well-known', 'agent-skills', 'index.json'); + +const checkOnly = process.argv.includes('--check'); + +/** + * @param {string} filePath + * @returns {Promise} sha256 hash as `sha256:` + */ +async function digestFile(filePath) { + const bytes = await readFile(filePath); + const hash = createHash('sha256').update(bytes).digest('hex'); + return `sha256:${hash}`; +} + +/** + * Resolve a public URL like `/.well-known/foo/bar.md` to its on-disk path. + * Validates that the resolved path stays under `publicRoot` so a malicious or + * malformed `url` (e.g. containing `..` segments) cannot escape the public + * directory in dev/CI environments. + * @param {string} url + */ +function resolvePublicPath(url) { + if (!url.startsWith('/')) { + throw new Error(`Skill url must be absolute: ${url}`); + } + const resolved = path.resolve(publicRoot, url.slice(1)); + const publicRootWithSep = publicRoot.endsWith(path.sep) ? publicRoot : publicRoot + path.sep; + if (resolved !== publicRoot && !resolved.startsWith(publicRootWithSep)) { + throw new Error(`Skill url escapes publicRoot: ${url}`); + } + return resolved; +} + +const raw = await readFile(indexPath, 'utf8'); +const original = raw; +/** @type {{ skills: Array<{ name: string; type: string; url: string; digest?: string }> }} */ +const index = JSON.parse(raw); + +if (!Array.isArray(index.skills)) { + throw new Error('index.json must contain a `skills` array'); +} + +let changed = false; +for (const skill of index.skills) { + if (skill.type !== 'skill-md') { + // Future skill types might use a different bytes-to-digest convention; + // bail loudly so we don't silently emit a wrong digest. + throw new Error(`Unsupported skill type "${skill.type}" for ${skill.name}`); + } + const filePath = resolvePublicPath(skill.url); + const fresh = await digestFile(filePath); + if (skill.digest !== fresh) { + changed = true; + if (!checkOnly) { + skill.digest = fresh; + } else { + console.error( + `[compute-skill-digests] STALE digest for ${skill.name}: index.json has ${skill.digest}, file is ${fresh}` + ); + } + } +} + +const next = JSON.stringify(index, null, 2) + '\n'; + +if (checkOnly) { + if (changed || next !== original) { + console.error( + '[compute-skill-digests] index.json is out of date. Run `node scripts/compute-skill-digests.mjs` to refresh.' + ); + process.exit(1); + } + console.log('[compute-skill-digests] index.json is up to date.'); + process.exit(0); +} + +if (next !== original) { + await writeFile(indexPath, next, 'utf8'); + console.log('[compute-skill-digests] Updated', path.relative(repoRoot, indexPath)); +} else { + console.log('[compute-skill-digests] No changes.'); +} diff --git a/src/frontend/src/components/starlight/Head.astro b/src/frontend/src/components/starlight/Head.astro index f957513c0..a4c7b5413 100644 --- a/src/frontend/src/components/starlight/Head.astro +++ b/src/frontend/src/components/starlight/Head.astro @@ -242,6 +242,11 @@ function computeSourceUrl() { })(); + + + ; + /** + * Run a query and return ranked results. `limit` is a soft cap; providers + * may return fewer results when the corpus is small. + */ + search(query: string, limit: number): Promise; +} diff --git a/src/frontend/src/scripts/search/index.ts b/src/frontend/src/scripts/search/index.ts new file mode 100644 index 000000000..3a02d1fa0 --- /dev/null +++ b/src/frontend/src/scripts/search/index.ts @@ -0,0 +1,24 @@ +import { pagefindProvider } from './pagefind-provider'; +import type { SearchProvider } from './SearchProvider'; +import { typesenseProvider } from './typesense-provider'; + +export type { SearchProvider, SearchResponse, SearchResult } from './SearchProvider'; + +/** + * Selects the active search provider based on the build-time + * `PUBLIC_SEARCH_PROVIDER` env var. Defaults to Pagefind, which is what + * Starlight ships today. Once the site migrates to Typesense, set + * `PUBLIC_SEARCH_PROVIDER=typesense` (or change the default below) — the + * WebMCP tool surface stays stable across the swap. + */ +export function getSearchProvider(): SearchProvider { + const raw = (import.meta.env.PUBLIC_SEARCH_PROVIDER as string | undefined) ?? 'pagefind'; + const id = raw.toLowerCase(); + switch (id) { + case 'typesense': + return typesenseProvider; + case 'pagefind': + default: + return pagefindProvider; + } +} diff --git a/src/frontend/src/scripts/search/pagefind-provider.ts b/src/frontend/src/scripts/search/pagefind-provider.ts new file mode 100644 index 000000000..cee66faa9 --- /dev/null +++ b/src/frontend/src/scripts/search/pagefind-provider.ts @@ -0,0 +1,86 @@ +import type { SearchProvider, SearchResponse, SearchResult } from './SearchProvider'; + +interface PagefindHitData { + url: string; + meta?: { title?: string }; + excerpt?: string; +} + +interface PagefindHit { + data: () => Promise; +} + +interface PagefindModule { + search: (query: string) => Promise<{ results: PagefindHit[] }>; +} + +/** + * Backs the WebMCP `search-aspire-docs` tool with Starlight's built-in Pagefind + * index. The index is shipped as a runtime artifact at `/pagefind/pagefind.js`, + * so we resolve it via dynamic import at first use and cache the module. + * + * If Pagefind is absent (skip-search builds, dev mode without a build), the + * provider returns `{ unavailable: true }` rather than throwing — agents can + * surface a graceful "search unavailable" state. + */ +export const pagefindProvider: SearchProvider = (() => { + let modulePromise: Promise | null = null; + + async function loadModule(): Promise { + if (typeof window === 'undefined') { + return null; + } + try { + // /pagefind/pagefind.js is emitted by Starlight at build time. + // @vite-ignore prevents Vite from trying to resolve this at bundle time. + const mod = (await import( + /* @vite-ignore */ `${window.location.origin}/pagefind/pagefind.js` + )) as PagefindModule; + return mod; + } catch { + return null; + } + } + + async function ensureReady(): Promise { + modulePromise ??= loadModule(); + await modulePromise; + } + + async function search(query: string, limit: number): Promise { + await ensureReady(); + const mod = await modulePromise; + if (!mod) { + return { + results: [], + unavailable: true, + reason: 'Pagefind is not available in this environment.', + }; + } + + const trimmed = query.trim(); + if (trimmed.length === 0) { + return { results: [] }; + } + + const raw = await mod.search(trimmed); + const top = raw.results.slice(0, Math.max(1, limit)); + const results: SearchResult[] = await Promise.all( + top.map(async (hit) => { + const data = await hit.data(); + return { + title: data.meta?.title ?? data.url, + url: data.url, + excerpt: (data.excerpt ?? '').replace(/<[^>]+>/g, '').trim(), + }; + }) + ); + return { results }; + } + + return { + id: 'pagefind', + ensureReady, + search, + }; +})(); diff --git a/src/frontend/src/scripts/search/typesense-provider.ts b/src/frontend/src/scripts/search/typesense-provider.ts new file mode 100644 index 000000000..44287aab4 --- /dev/null +++ b/src/frontend/src/scripts/search/typesense-provider.ts @@ -0,0 +1,26 @@ +import type { SearchProvider } from './SearchProvider'; + +/** + * Stub Typesense-backed provider. aspire.dev's site search is migrating from + * Pagefind to Typesense; this file exists so the swap is a one-line change in + * `./index.ts` once the Typesense index, public API key, and host are wired + * up site-wide. + * + * Until then the provider responds with `{ unavailable: true }`. It carries + * NO runtime dependency on the `typesense` SDK — the npm package is added + * later as part of the Typesense migration, not as part of agent-readiness. + */ +export const typesenseProvider: SearchProvider = { + id: 'typesense', + ensureReady() { + return Promise.resolve(); + }, + search() { + return Promise.resolve({ + results: [], + unavailable: true, + reason: + 'Typesense provider not yet wired. Set PUBLIC_SEARCH_PROVIDER=typesense after migration.', + }); + }, +}; diff --git a/src/frontend/src/scripts/webmcp.ts b/src/frontend/src/scripts/webmcp.ts new file mode 100644 index 000000000..97fed9683 --- /dev/null +++ b/src/frontend/src/scripts/webmcp.ts @@ -0,0 +1,134 @@ +/** + * WebMCP integration: exposes a single `search-aspire-docs` tool to in-page + * agents via `navigator.modelContext.registerTool()`. The execute callback + * delegates to a swappable {@link SearchProvider} so the tool surface is + * stable across search-engine migrations (Pagefind today, Typesense later). + * + * Spec: https://webmachinelearning.github.io/webmcp/ + */ + +import { getSearchProvider, type SearchResponse } from './search'; + +interface ToolExecuteContext { + signal?: AbortSignal; +} + +interface ModelContextTool { + name: string; + description: string; + inputSchema: object; + execute: (input: unknown, ctx?: ToolExecuteContext) => Promise; +} + +interface ModelContextLike { + registerTool: (tool: ModelContextTool) => unknown; +} + +declare global { + interface Navigator { + modelContext?: ModelContextLike; + } + interface Window { + __aspireWebMCPRegistered?: boolean; + } +} + +const TOOL_NAME = 'search-aspire-docs'; +const SEARCH_LIMIT_DEFAULT = 10; +const SEARCH_LIMIT_MAX = 25; + +const inputSchema = { + type: 'object', + additionalProperties: false, + required: ['query'], + properties: { + query: { + type: 'string', + minLength: 1, + description: 'A natural-language or keyword query about Aspire.', + }, + limit: { + type: 'integer', + minimum: 1, + maximum: SEARCH_LIMIT_MAX, + default: SEARCH_LIMIT_DEFAULT, + description: 'Maximum number of results to return.', + }, + }, +} as const; + +const description = + 'Search the official Aspire documentation, integration catalog, and CLI reference on aspire.dev. ' + + 'Returns ranked results with title, URL, and a short excerpt.'; + +function clampLimit(input: unknown): number { + if (typeof input !== 'number' || !Number.isFinite(input)) { + return SEARCH_LIMIT_DEFAULT; + } + return Math.max(1, Math.min(SEARCH_LIMIT_MAX, Math.floor(input))); +} + +function asMcpResult(payload: SearchResponse): unknown { + // MCP/WebMCP tool results conventionally wrap their response in a + // content array of typed parts. JSON-as-text keeps the result agent-friendly + // without requiring the host to render structured data. + return { + content: [{ type: 'text', text: JSON.stringify(payload) }], + isError: payload.unavailable === true, + }; +} + +async function executeSearch(input: unknown): Promise { + const args = (input ?? {}) as { query?: unknown; limit?: unknown }; + const query = typeof args.query === 'string' ? args.query : ''; + const limit = clampLimit(args.limit); + + if (query.trim().length === 0) { + return asMcpResult({ + results: [], + unavailable: true, + reason: '`query` is required and must be a non-empty string.', + }); + } + + const provider = getSearchProvider(); + try { + const response = await provider.search(query, limit); + return asMcpResult(response); + } catch (error) { + return asMcpResult({ + results: [], + unavailable: true, + reason: error instanceof Error ? error.message : 'Search failed.', + }); + } +} + +function register(): void { + if (typeof navigator === 'undefined' || typeof window === 'undefined') { + return; + } + if (window.__aspireWebMCPRegistered) { + return; + } + const ctx = navigator.modelContext; + if (!ctx || typeof ctx.registerTool !== 'function') { + return; + } + + try { + ctx.registerTool({ + name: TOOL_NAME, + description, + inputSchema, + execute: executeSearch, + }); + window.__aspireWebMCPRegistered = true; + } catch { + // registerTool throws if a tool with the same name is already registered. + // That's fine on subsequent view transitions; mark as registered to skip. + window.__aspireWebMCPRegistered = true; + } +} + +register(); diff --git a/src/frontend/tests/e2e/webmcp.spec.ts b/src/frontend/tests/e2e/webmcp.spec.ts new file mode 100644 index 000000000..f1e90c0ff --- /dev/null +++ b/src/frontend/tests/e2e/webmcp.spec.ts @@ -0,0 +1,68 @@ +import { expect, test } from '@playwright/test'; + +/** + * WebMCP integration: when the runtime exposes + * `navigator.modelContext.registerTool`, the homepage script must call it + * exactly once with the `search-aspire-docs` tool. + * + * Browsers don't ship `navigator.modelContext` yet, so we install a stub via + * `addInitScript` before navigating and assert against the stub. + */ +test.describe('WebMCP', () => { + test('homepage registers the search-aspire-docs tool', async ({ page }) => { + await page.addInitScript(() => { + type RegisteredTool = { + name: string; + description: string; + inputSchema: unknown; + }; + + const calls: RegisteredTool[] = []; + Object.defineProperty(navigator, 'modelContext', { + configurable: true, + value: { + registerTool(tool: RegisteredTool): void { + calls.push({ + name: tool.name, + description: tool.description, + inputSchema: tool.inputSchema, + }); + }, + }, + }); + (window as unknown as { __webmcpCalls: RegisteredTool[] }).__webmcpCalls = calls; + }); + + await page.goto('/'); + // Allow the deferred WebMCP module script to run; it registers + // synchronously after import. + await page.waitForLoadState('domcontentloaded'); + + const calls = await page.evaluate( + () => (window as unknown as { __webmcpCalls?: unknown[] }).__webmcpCalls ?? [] + ); + + expect(calls).toHaveLength(1); + expect(calls[0]).toMatchObject({ + name: 'search-aspire-docs', + }); + + const inputSchema = (calls[0] as { inputSchema: Record }).inputSchema; + expect(inputSchema.type).toBe('object'); + const properties = inputSchema.properties as Record; + expect(properties.query).toBeDefined(); + expect((inputSchema.required as string[])).toContain('query'); + }); + + test('absence of navigator.modelContext is non-fatal', async ({ page }) => { + // Default browser env: no modelContext. Just confirm the script load + // does not throw and the page renders. + const errors: string[] = []; + page.on('pageerror', (err) => errors.push(err.message)); + + await page.goto('/'); + await page.waitForLoadState('domcontentloaded'); + + expect(errors).toEqual([]); + }); +}); diff --git a/src/frontend/tsconfig.json b/src/frontend/tsconfig.json index 75bc2be72..9b6f74b16 100644 --- a/src/frontend/tsconfig.json +++ b/src/frontend/tsconfig.json @@ -7,6 +7,7 @@ "@assets/*": ["./src/assets/*"], "@components/*": ["./src/components/*"], "@data/*": ["./src/data/*"], + "@scripts/*": ["./src/scripts/*"], "@tests/e2e/*": ["./tests/e2e/*"], "@tests/typecheck/*": ["./tests/typecheck/*"], "@tests/unit/*": ["./tests/unit/*"], diff --git a/src/statichost/StaticHost/AgentReadiness/AcceptHeaderParser.cs b/src/statichost/StaticHost/AgentReadiness/AcceptHeaderParser.cs new file mode 100644 index 000000000..5c4cb5248 --- /dev/null +++ b/src/statichost/StaticHost/AgentReadiness/AcceptHeaderParser.cs @@ -0,0 +1,160 @@ +namespace StaticHost.AgentReadiness; + +/// +/// Parses HTTP Accept header values into ranked media types and answers +/// targeted questions like "does the client prefer markdown over HTML?". +/// +/// +/// Implements a small subset of RFC 9110 §12.5.1 sufficient for content +/// negotiation between text/html and text/markdown. We intentionally +/// avoid pulling in Microsoft.Net.Http.Headers's full parser because we +/// need a deterministic, testable predicate that knows about q-values, the +/// */* wildcard, and parameter-laden values like +/// text/markdown;profile="cmark";q=0.9. +/// +internal static class AcceptHeaderParser +{ + private const double DefaultQuality = 1.0; + + internal readonly record struct MediaTypeWithQ(string Type, string Subtype, double Quality) + { + public bool Matches(string type, string subtype) => + (Type == "*" || string.Equals(Type, type, StringComparison.OrdinalIgnoreCase)) && + (Subtype == "*" || string.Equals(Subtype, subtype, StringComparison.OrdinalIgnoreCase)); + } + + /// + /// Parses the supplied Accept header value into a list of media types with + /// q-values. Returns an empty list when the header is null/empty. + /// + public static IReadOnlyList Parse(string? acceptHeader) + { + if (string.IsNullOrWhiteSpace(acceptHeader)) + { + return Array.Empty(); + } + + var results = new List(); + foreach (var rawSegment in acceptHeader.Split(',')) + { + var segment = rawSegment.Trim(); + if (segment.Length == 0) + { + continue; + } + + var parts = segment.Split(';'); + var mediaType = parts[0].Trim(); + var slash = mediaType.IndexOf('/'); + if (slash <= 0 || slash == mediaType.Length - 1) + { + continue; + } + + var type = mediaType[..slash]; + var subtype = mediaType[(slash + 1)..]; + var q = DefaultQuality; + for (var i = 1; i < parts.Length; i++) + { + var param = parts[i].Trim(); + if (param.StartsWith("q=", StringComparison.OrdinalIgnoreCase) && + double.TryParse( + param.AsSpan(2), + System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, + out var parsed)) + { + q = Math.Clamp(parsed, 0.0, 1.0); + break; + } + } + + results.Add(new MediaTypeWithQ(type, subtype, q)); + } + + return results; + } + + /// + /// Returns the highest q-value across entries that match the given + /// /, including wildcards. + /// Returns 0.0 when no entry matches (i.e. the type is not acceptable). + /// + public static double QualityFor(IReadOnlyList ranked, string type, string subtype) + { + var best = 0.0; + foreach (var entry in ranked) + { + if (entry.Matches(type, subtype) && entry.Quality > best) + { + best = entry.Quality; + } + } + return best; + } + + /// + /// True when the client prefers text/markdown strictly over both + /// text/html and * / *. * / * alone (a generic Accept) + /// is NOT a markdown preference. + /// + public static bool PrefersMarkdown(string? acceptHeader) + { + var ranked = Parse(acceptHeader); + if (ranked.Count == 0) + { + return false; + } + + // Wildcards cannot express markdown preference (a request with only "*/*" + // should get HTML), so we look only at explicit text/markdown entries. + var markdownQ = HighestExplicitQuality(ranked, "text", "markdown"); + if (markdownQ <= 0.0) + { + return false; + } + + // For HTML we count any acceptable representation, including wildcards + // like "*/*" or "text/*", because those make HTML a viable response. + var anyHtmlQ = QualityFor(ranked, "text", "html"); + + // Strictly prefer markdown when it beats any acceptable HTML representation, + // OR when HTML is not acceptable at all (markdownQ > 0 and htmlQ == 0). + return anyHtmlQ <= 0.0 || markdownQ > anyHtmlQ; + } + + /// + /// Highest q-value across entries that match / + /// exactly (no wildcards). Returns 0.0 when no explicit entry matches. + /// + private static double HighestExplicitQuality(IReadOnlyList ranked, string type, string subtype) + { + var best = 0.0; + foreach (var entry in ranked) + { + if (string.Equals(entry.Type, type, StringComparison.OrdinalIgnoreCase) && + string.Equals(entry.Subtype, subtype, StringComparison.OrdinalIgnoreCase) && + entry.Quality > best) + { + best = entry.Quality; + } + } + return best; + } + + /// + /// True when an HTML response would still be acceptable to the client + /// (either via an explicit text/html entry, text/* wildcard, or */*). + /// Used when we can't satisfy a markdown preference and want to know + /// whether falling back to HTML is OK or whether 406 is the right answer. + /// + public static bool AcceptsHtml(string? acceptHeader) + { + var ranked = Parse(acceptHeader); + if (ranked.Count == 0) + { + return true; + } + return QualityFor(ranked, "text", "html") > 0.0; + } +} diff --git a/src/statichost/StaticHost/AgentReadiness/AgentReadinessExtensions.cs b/src/statichost/StaticHost/AgentReadiness/AgentReadinessExtensions.cs new file mode 100644 index 000000000..1e20f69a0 --- /dev/null +++ b/src/statichost/StaticHost/AgentReadiness/AgentReadinessExtensions.cs @@ -0,0 +1,26 @@ +namespace StaticHost.AgentReadiness; + +/// +/// Composition-root extension methods for the agent-readiness middlewares. +/// +public static class AgentReadinessExtensions +{ + /// + /// Registers the markdown content-negotiation middleware and the Link + /// header middleware. Call before UseDefaultFiles + /// and UseRouting so the middlewares operate on the original + /// request path; otherwise UseDefaultFiles rewrites /foo/ + /// to /foo/index.html and routing pre-selects the HTML endpoint. + /// + public static IApplicationBuilder UseAgentReadiness(this IApplicationBuilder app) + { + ArgumentNullException.ThrowIfNull(app); + + // Order matters: markdown negotiation MUST run first so it can + // short-circuit the pipeline and serve the .md body directly without + // the Link header middleware ever seeing the request. + app.UseMiddleware(); + app.UseMiddleware(); + return app; + } +} diff --git a/src/statichost/StaticHost/AgentReadiness/LinkHeaderMiddleware.cs b/src/statichost/StaticHost/AgentReadiness/LinkHeaderMiddleware.cs new file mode 100644 index 000000000..ce3628493 --- /dev/null +++ b/src/statichost/StaticHost/AgentReadiness/LinkHeaderMiddleware.cs @@ -0,0 +1,103 @@ +using Microsoft.Extensions.FileProviders; +using Microsoft.Net.Http.Headers; + +namespace StaticHost.AgentReadiness; + +/// +/// Adds an RFC 8288 Link response header advertising agent-discovery +/// resources on HTML responses (root + Starlight doc pages). +/// +/// +/// +/// The header is set via Response.OnStarting so we can inspect the final +/// Content-Type and StatusCode after downstream middleware / +/// endpoints have decided what to send. We only emit the header on 2xx +/// responses whose Content-Type starts with text/html; redirects, JSON +/// responses, static assets, and well-known JSON files are all skipped. +/// +/// +/// The middleware MUST run before UseDefaultFiles and UseRouting +/// so it can read the original request path. The decision about whether an +/// .md companion exists is made once per request, before any path +/// rewriting performed by UseDefaultFiles. +/// +/// +internal sealed class LinkHeaderMiddleware +{ + // Always-on links advertised on every HTML page. + // Combined into a single Link header per RFC 8288 §3 (multiple values comma-separated). + private static readonly string s_baseLinkHeader = string.Join(", ", + [ + "; rel=\"llms\"; type=\"text/plain\"", + "; rel=\"agent-skills\"; type=\"application/json\"", + "; rel=\"sitemap\"; type=\"application/xml\"" + ]); + + private readonly RequestDelegate _next; + private readonly IFileProvider _fileProvider; + + public LinkHeaderMiddleware(RequestDelegate next, IWebHostEnvironment env) + { + _next = next; + _fileProvider = env.WebRootFileProvider; + } + + public Task InvokeAsync(HttpContext context) + { + if (!ShouldHandle(context.Request)) + { + return _next(context); + } + + // Infrastructure paths (/_astro, /.well-known, /healthz, /install.*, /pagefind) + // never need a Link header. The path list lives on MarkdownPathMapper so + // both middlewares stay in lock-step. + if (MarkdownPathMapper.IsInfrastructurePath(context.Request.Path)) + { + return _next(context); + } + + // Resolve the markdown companion against the ORIGINAL request path, + // before UseDefaultFiles rewrites it to /foo/index.html. + var companionPath = MarkdownPathMapper.TryGetMarkdownCompanion( + context.Request.Path, + _fileProvider); + + context.Response.OnStarting(static state => + { + var ctx = (LinkHeaderState)state; + var response = ctx.HttpContext.Response; + + // Only attach Link to successful HTML responses. Skip 3xx/4xx/5xx + // and any non-HTML content type. This keeps redirects, JSON, + // images, and static asset responses clean. + if (response.StatusCode is < 200 or >= 300) + { + return Task.CompletedTask; + } + + var contentType = response.ContentType; + if (string.IsNullOrEmpty(contentType) || + !contentType.StartsWith("text/html", StringComparison.OrdinalIgnoreCase)) + { + return Task.CompletedTask; + } + + var header = ctx.CompanionPath is null + ? s_baseLinkHeader + : $"{s_baseLinkHeader}, <{ctx.CompanionPath}>; rel=\"alternate\"; type=\"text/markdown\""; + + // Append rather than overwrite so we cooperate with anything else + // that may have set a Link header. + response.Headers.Append(HeaderNames.Link, header); + return Task.CompletedTask; + }, new LinkHeaderState(context, companionPath)); + + return _next(context); + } + + private static bool ShouldHandle(HttpRequest request) => + HttpMethods.IsGet(request.Method) || HttpMethods.IsHead(request.Method); + + private sealed record LinkHeaderState(HttpContext HttpContext, string? CompanionPath); +} diff --git a/src/statichost/StaticHost/AgentReadiness/MarkdownNegotiationMiddleware.cs b/src/statichost/StaticHost/AgentReadiness/MarkdownNegotiationMiddleware.cs new file mode 100644 index 000000000..5fed2fe89 --- /dev/null +++ b/src/statichost/StaticHost/AgentReadiness/MarkdownNegotiationMiddleware.cs @@ -0,0 +1,138 @@ +using Microsoft.AspNetCore.StaticFiles; +using Microsoft.Extensions.FileProviders; +using Microsoft.Net.Http.Headers; + +namespace StaticHost.AgentReadiness; + +/// +/// Serves the markdown companion of a Starlight HTML page when the client +/// prefers text/markdown via the Accept header. +/// +/// +/// +/// The site's static-asset pipeline emits a .md sibling next to every +/// HTML page (via starlight-page-actions). E.g. /get-started/quickstart/ +/// has companion /get-started/quickstart.md. This middleware short-circuits +/// the pipeline by streaming that file directly when markdown is preferred — +/// it does NOT rewrite Request.Path because rewriting after +/// UseRouting() would not re-trigger endpoint selection in +/// MapStaticAssets(). +/// +/// +/// Therefore the middleware MUST run before UseDefaultFiles and +/// UseRouting. It operates on the original request path so it can compute +/// the companion .md mapping reliably (UseDefaultFiles would +/// otherwise have already turned /foo/ into /foo/index.html). +/// +/// +/// To avoid Vary: Accept cache-key explosion at Azure Front Door, the +/// negotiated response sets Cache-Control: private, max-age=0, +/// must-revalidate. The HTML response is left untouched (no Vary +/// added) so HTML continues to cache normally. +/// +/// +internal sealed class MarkdownNegotiationMiddleware +{ + private readonly RequestDelegate _next; + private readonly IFileProvider _fileProvider; + private readonly ILogger _logger; + + public MarkdownNegotiationMiddleware( + RequestDelegate next, + IWebHostEnvironment env, + ILogger logger) + { + _next = next; + _logger = logger; + // Use the same provider MapStaticAssets reads from so wwwroot is the source of truth. + _fileProvider = env.WebRootFileProvider; + } + + public async Task InvokeAsync(HttpContext context) + { + if (!ShouldHandle(context.Request)) + { + await _next(context); + return; + } + + // Infrastructure paths (e.g. /.well-known/*, /_astro/*, /healthz) + // are NEVER subject to markdown negotiation — they have their own + // content types and the agent might prefer markdown for the page + // navigation but still expect JSON for these resources. + if (MarkdownPathMapper.IsInfrastructurePath(context.Request.Path)) + { + await _next(context); + return; + } + + var acceptHeader = context.Request.Headers.Accept.ToString(); + if (!AcceptHeaderParser.PrefersMarkdown(acceptHeader)) + { + await _next(context); + return; + } + + var companionPath = MarkdownPathMapper.TryGetMarkdownCompanion( + context.Request.Path, + _fileProvider); + if (companionPath is null) + { + // Markdown was preferred but no companion exists. + // Fall back to HTML when it's acceptable; otherwise 406. + if (AcceptHeaderParser.AcceptsHtml(acceptHeader)) + { + await _next(context); + } + else + { + context.Response.StatusCode = StatusCodes.Status406NotAcceptable; + context.Response.ContentType = "text/plain; charset=utf-8"; + await context.Response.WriteAsync( + "406 Not Acceptable: this resource is available as text/html or, where a companion exists, text/markdown.", + context.RequestAborted); + } + return; + } + + var fileInfo = _fileProvider.GetFileInfo(companionPath); + if (!fileInfo.Exists || fileInfo.IsDirectory) + { + // Race / inconsistency between MarkdownPathMapper.TryGetMarkdownCompanion + // and the live file system. Fall through to default pipeline rather than 500. + _logger.LogDebug( + "Markdown companion {CompanionPath} reported but missing on disk; falling through.", + companionPath); + await _next(context); + return; + } + + WriteMarkdownHeaders(context, fileInfo); + + if (HttpMethods.IsHead(context.Request.Method)) + { + // Send headers only; ASP.NET Core will flush them on response complete. + return; + } + + await context.Response.SendFileAsync(fileInfo, context.RequestAborted); + } + + private static bool ShouldHandle(HttpRequest request) => + HttpMethods.IsGet(request.Method) || HttpMethods.IsHead(request.Method); + + private static void WriteMarkdownHeaders(HttpContext context, IFileInfo fileInfo) + { + var response = context.Response; + response.StatusCode = StatusCodes.Status200OK; + response.ContentType = "text/markdown; charset=utf-8"; + response.ContentLength = fileInfo.Length; + // Vary: Accept is the correct HTTP signal; combined with Cache-Control: private + // it tells well-behaved CDNs (incl. Azure Front Door) NOT to cache this response, + // avoiding cache-key explosion across many distinct Accept values. + response.Headers.Vary = "Accept"; + response.Headers.CacheControl = "private, max-age=0, must-revalidate"; + response.Headers[HeaderNames.LastModified] = + fileInfo.LastModified.UtcDateTime.ToString("R", System.Globalization.CultureInfo.InvariantCulture); + } +} diff --git a/src/statichost/StaticHost/AgentReadiness/MarkdownPathMapper.cs b/src/statichost/StaticHost/AgentReadiness/MarkdownPathMapper.cs new file mode 100644 index 000000000..7e952aa2f --- /dev/null +++ b/src/statichost/StaticHost/AgentReadiness/MarkdownPathMapper.cs @@ -0,0 +1,94 @@ +using Microsoft.Extensions.FileProviders; + +namespace StaticHost.AgentReadiness; + +/// +/// Maps an HTTP request path to its sibling .md file in wwwroot, +/// matching the layout produced by starlight-page-actions: +/// +/// //index.html + /index.md +/// /foo//foo/index.html + /foo.md +/// /foo/foo/index.html (or /foo.html) + /foo.md +/// /foo/bar//foo/bar/index.html + /foo/bar.md +/// /foo.html/foo.html + /foo.md +/// +/// +/// A companion is only valid when both the .md and the +/// corresponding HTML page exist on disk. Not every page on aspire.dev produces +/// a .md companion (e.g. DocFX-rendered /reference/api/** pages, +/// the search page, Lunaria stats, redirects, the 404 page); requiring an HTML +/// sibling prevents us from advertising or serving a stray .md as the +/// companion of a URL that has no real HTML page. +/// +/// +/// Returns when the path is infrastructure (e.g. +/// /_astro/*, /.well-known/*), when no .md exists, or +/// when no HTML sibling exists. +/// +/// +internal static class MarkdownPathMapper +{ + public static bool IsInfrastructurePath(PathString path) + { + var raw = path.HasValue ? path.Value! : "/"; + return raw.StartsWith("/_astro/", StringComparison.OrdinalIgnoreCase) || + raw.StartsWith("/.well-known/", StringComparison.OrdinalIgnoreCase) || + raw.StartsWith("/healthz", StringComparison.OrdinalIgnoreCase) || + raw.StartsWith("/install.", StringComparison.OrdinalIgnoreCase) || + raw.StartsWith("/pagefind/", StringComparison.OrdinalIgnoreCase); + } + + public static string? TryGetMarkdownCompanion(PathString path, IFileProvider fileProvider) + { + if (IsInfrastructurePath(path)) + { + return null; + } + + var raw = path.HasValue ? path.Value! : "/"; + + if (raw == "/") + { + return CompanionIfBothExist(fileProvider, "/index.html", "/index.md"); + } + + if (raw.EndsWith('/')) + { + return CompanionIfBothExist(fileProvider, $"{raw}index.html", $"{raw[..^1]}.md"); + } + + if (raw.EndsWith(".html", StringComparison.OrdinalIgnoreCase)) + { + return CompanionIfBothExist(fileProvider, raw, raw[..^".html".Length] + ".md"); + } + + var dot = raw.LastIndexOf('.'); + var lastSlash = raw.LastIndexOf('/'); + if (dot <= lastSlash) + { + // Extensionless path like /foo. Static-file middleware may serve + // either /foo/index.html (Starlight) or /foo.html. Either is enough + // to anchor the companion mapping. + var md = $"{raw}.md"; + if (!Exists(fileProvider, md)) + { + return null; + } + if (Exists(fileProvider, $"{raw}/index.html") || Exists(fileProvider, $"{raw}.html")) + { + return md; + } + } + + return null; + } + + private static string? CompanionIfBothExist(IFileProvider provider, string htmlPath, string mdPath) => + Exists(provider, htmlPath) && Exists(provider, mdPath) ? mdPath : null; + + private static bool Exists(IFileProvider provider, string relativePath) + { + var info = provider.GetFileInfo(relativePath); + return info.Exists && !info.IsDirectory; + } +} diff --git a/src/statichost/StaticHost/GlobalUsings.cs b/src/statichost/StaticHost/GlobalUsings.cs index 31e62ced9..604760adf 100644 --- a/src/statichost/StaticHost/GlobalUsings.cs +++ b/src/statichost/StaticHost/GlobalUsings.cs @@ -1,6 +1,7 @@ global using System.Diagnostics; global using StaticHost; +global using StaticHost.AgentReadiness; global using StaticHost.Telemetry; global using OpenTelemetry; diff --git a/src/statichost/StaticHost/Program.cs b/src/statichost/StaticHost/Program.cs index 0a7a3e098..de7c4592b 100644 --- a/src/statichost/StaticHost/Program.cs +++ b/src/statichost/StaticHost/Program.cs @@ -18,6 +18,15 @@ } app.UseHttpsRedirection(); + +// Agent-readiness middlewares MUST run before UseDefaultFiles + UseRouting: +// * UseDefaultFiles rewrites /foo/ -> /foo/index.html, breaking .md-companion mapping. +// * MapStaticAssets registers endpoints during UseRouting, so a path rewrite +// after UseRouting wouldn't re-trigger endpoint selection. +// MarkdownNegotiationMiddleware short-circuits to serve the .md body directly; +// LinkHeaderMiddleware attaches a Link header on HTML 2xx responses via OnStarting. +app.UseAgentReadiness(); + app.UseDefaultFiles(); // add routing after default files, so the default file middleware can modify the path first diff --git a/src/statichost/StaticHost/StaticHost.csproj b/src/statichost/StaticHost/StaticHost.csproj index d5fd4bf48..00062f87c 100644 --- a/src/statichost/StaticHost/StaticHost.csproj +++ b/src/statichost/StaticHost/StaticHost.csproj @@ -23,7 +23,22 @@ - + + + + + + + all + diff --git a/tests/StaticHost.Tests/AcceptHeaderParserTests.cs b/tests/StaticHost.Tests/AcceptHeaderParserTests.cs new file mode 100644 index 000000000..9a3b24b09 --- /dev/null +++ b/tests/StaticHost.Tests/AcceptHeaderParserTests.cs @@ -0,0 +1,49 @@ +using StaticHost.AgentReadiness; + +namespace StaticHost.Tests; + +/// +/// Direct unit tests for . These cover the +/// q-value parsing edge cases without spinning up a host. +/// +public sealed class AcceptHeaderParserTests +{ + [Theory] + [InlineData("text/markdown")] + [InlineData("text/markdown, text/html;q=0.5")] + [InlineData("text/markdown;q=0.9, text/html;q=0.5")] + [InlineData("text/markdown;q=1.0, text/*;q=0.5")] + [InlineData("text/markdown;q=0.8, */*;q=0.5")] + public void PrefersMarkdown_returns_true_when_markdown_outranks_html(string accept) + { + Assert.True(AcceptHeaderParser.PrefersMarkdown(accept)); + } + + [Theory] + [InlineData("")] + [InlineData("text/html")] + [InlineData("text/html, application/xhtml+xml")] + [InlineData("text/markdown;q=0.5, text/html;q=1.0")] + [InlineData("text/markdown;q=0.5, text/html")] + [InlineData("*/*")] + [InlineData("text/*")] + [InlineData("text/markdown;q=0.0, text/html")] + public void PrefersMarkdown_returns_false_for_browsers_and_lower_q(string accept) + { + Assert.False(AcceptHeaderParser.PrefersMarkdown(accept)); + } + + [Theory] + [InlineData("text/html", true)] + [InlineData("*/*", true)] + [InlineData("text/*", true)] + [InlineData("text/html, application/xhtml+xml", true)] + [InlineData("text/markdown", false)] + [InlineData("text/markdown;q=1.0, text/html;q=0.0", false)] + [InlineData("application/json", false)] + [InlineData("", true)] // empty Accept header is treated as "anything" + public void AcceptsHtml_distinguishes_html_clients(string accept, bool expected) + { + Assert.Equal(expected, AcceptHeaderParser.AcceptsHtml(accept)); + } +} diff --git a/tests/StaticHost.Tests/AgentReadinessTestServer.cs b/tests/StaticHost.Tests/AgentReadinessTestServer.cs new file mode 100644 index 000000000..313b61210 --- /dev/null +++ b/tests/StaticHost.Tests/AgentReadinessTestServer.cs @@ -0,0 +1,125 @@ +using System.Net.Http; +using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Hosting; +using Microsoft.AspNetCore.TestHost; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; +using StaticHost.AgentReadiness; + +namespace StaticHost.Tests; + +/// +/// Builds an in-process ASP.NET Core test server that mirrors the production +/// pipeline order for the agent-readiness middlewares (markdown negotiation +/// and Link headers) without depending on the frontend build. +/// +internal sealed class AgentReadinessTestServer : IAsyncDisposable +{ + private readonly IHost _host; + private readonly TestServer _testServer; + private readonly TempWebRoot _wwwroot; + + public HttpClient Client { get; } + + public string WebRoot => _wwwroot.Path; + + private AgentReadinessTestServer(IHost host, TestServer testServer, TempWebRoot wwwroot) + { + _host = host; + _testServer = testServer; + _wwwroot = wwwroot; + Client = testServer.CreateClient(); + } + + public static async Task StartAsync(Action? seed = null) + { + var wwwroot = new TempWebRoot(); + try + { + seed?.Invoke(wwwroot); + + var hostBuilder = Host.CreateDefaultBuilder() + .ConfigureLogging(static logging => logging.ClearProviders()) + .ConfigureWebHost(web => + { + web.UseTestServer(); + web.UseWebRoot(wwwroot.Path); + web.Configure(app => + { + // Match production order: agent-readiness BEFORE + // UseDefaultFiles + UseRouting (see Program.cs). + app.UseAgentReadiness(); + app.UseDefaultFiles(); + app.UseStaticFiles(); + + // Endpoint that mimics MapStaticAssets fallback: 404 if + // no static asset matched. + app.Run(static ctx => + { + ctx.Response.StatusCode = StatusCodes.Status404NotFound; + return Task.CompletedTask; + }); + }); + }); + + var host = hostBuilder.Build(); + await host.StartAsync(); + var testServer = host.GetTestServer(); + return new AgentReadinessTestServer(host, testServer, wwwroot); + } + catch + { + wwwroot.Dispose(); + throw; + } + } + + public async ValueTask DisposeAsync() + { + Client.Dispose(); + _testServer.Dispose(); + await _host.StopAsync(); + _host.Dispose(); + _wwwroot.Dispose(); + } +} + +internal sealed class TempWebRoot : IDisposable +{ + public string Path { get; } + + public TempWebRoot() + { + Path = System.IO.Path.Combine( + System.IO.Path.GetTempPath(), + $"aspire-agent-readiness-{Guid.NewGuid():N}"); + Directory.CreateDirectory(Path); + } + + public void WriteFile(string relativePath, string contents) + { + var fullPath = System.IO.Path.Combine(Path, relativePath.TrimStart('/').Replace('/', System.IO.Path.DirectorySeparatorChar)); + var directory = System.IO.Path.GetDirectoryName(fullPath); + if (!string.IsNullOrEmpty(directory)) + { + Directory.CreateDirectory(directory); + } + File.WriteAllText(fullPath, contents); + } + + public void Dispose() + { + try + { + if (Directory.Exists(Path)) + { + Directory.Delete(Path, recursive: true); + } + } + catch (IOException) + { + // best-effort + } + } +} diff --git a/tests/StaticHost.Tests/GlobalUsings.cs b/tests/StaticHost.Tests/GlobalUsings.cs new file mode 100644 index 000000000..a6f18f0cb --- /dev/null +++ b/tests/StaticHost.Tests/GlobalUsings.cs @@ -0,0 +1,10 @@ +global using System; +global using System.Collections.Generic; +global using System.IO; +global using System.Linq; +global using System.Net; +global using System.Net.Http; +global using System.Net.Http.Headers; +global using System.Threading.Tasks; +global using Microsoft.AspNetCore.Http; +global using Xunit; diff --git a/tests/StaticHost.Tests/LinkHeaderTests.cs b/tests/StaticHost.Tests/LinkHeaderTests.cs new file mode 100644 index 000000000..c7fbf9822 --- /dev/null +++ b/tests/StaticHost.Tests/LinkHeaderTests.cs @@ -0,0 +1,134 @@ +namespace StaticHost.Tests; + +public sealed class LinkHeaderTests +{ + private static string? GetLinkHeader(HttpResponseMessage response) => + response.Headers.TryGetValues("Link", out var values) ? string.Join(", ", values) : null; + + [Fact] + public async Task Html_response_includes_link_header_with_required_rels() + { + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + SamplePages.SeedRoot(root); + root.WriteFile("llms.txt", "# llms\n"); + root.WriteFile(".well-known/agent-skills/index.json", "{\"skills\":[]}"); + root.WriteFile("sitemap-index.xml", ""); + }); + + using var response = await server.Client.GetAsync("/"); + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + var link = GetLinkHeader(response); + Assert.NotNull(link); + Assert.Contains("rel=\"llms\"", link); + Assert.Contains("rel=\"agent-skills\"", link); + Assert.Contains("rel=\"sitemap\"", link); + Assert.Contains("rel=\"alternate\"", link); + Assert.Contains("type=\"text/markdown\"", link); + // Confirm we are NOT advertising api-catalog (out of scope per RFC 9727). + Assert.DoesNotContain("api-catalog", link); + } + + [Fact] + public async Task Page_without_md_companion_omits_alternate_link() + { + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + root.WriteFile("reference/api/csharp/index.html", SamplePages.Html); + // no .md companion + }); + + using var response = await server.Client.GetAsync("/reference/api/csharp/"); + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + var link = GetLinkHeader(response); + Assert.NotNull(link); + Assert.DoesNotContain("rel=\"alternate\"", link); + Assert.Contains("rel=\"llms\"", link); + } + + [Fact] + public async Task Stray_md_without_html_sibling_does_not_get_advertised() + { + // A naked .md sitting in wwwroot with no Starlight HTML page must not be + // exposed as a companion. The request below 404s in the static pipeline, + // but even before that the OnStarting predicate already won't attach the + // header on a 404 — this test pins the LinkHeader path mapping so it + // would refuse to advertise the .md even on a 200 collision. + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + root.WriteFile("stray.md", "# stray\n"); + }); + + using var response = await server.Client.GetAsync("/stray/"); + Assert.Equal(HttpStatusCode.NotFound, response.StatusCode); + Assert.Null(GetLinkHeader(response)); + } + + [Fact] + public async Task Json_response_does_not_get_link_header() + { + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + root.WriteFile(".well-known/agent-skills/index.json", "{\"skills\":[]}"); + }); + + using var response = await server.Client.GetAsync("/.well-known/agent-skills/index.json"); + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Null(GetLinkHeader(response)); + } + + [Fact] + public async Task Static_asset_path_does_not_get_link_header() + { + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + root.WriteFile("_astro/app.abc123.js", "/* js */"); + }); + + using var response = await server.Client.GetAsync("/_astro/app.abc123.js"); + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Null(GetLinkHeader(response)); + } + + [Fact] + public async Task Not_found_response_does_not_get_link_header() + { + await using var server = await AgentReadinessTestServer.StartAsync(); + + using var response = await server.Client.GetAsync("/nope"); + Assert.Equal(HttpStatusCode.NotFound, response.StatusCode); + Assert.Null(GetLinkHeader(response)); + } + + [Fact] + public async Task Markdown_negotiated_response_does_not_get_link_header() + { + // The markdown middleware short-circuits before the Link middleware + // attaches headers; even if it didn't, the response Content-Type is + // text/markdown, so the OnStarting predicate would skip it. + await using var server = await AgentReadinessTestServer.StartAsync(SamplePages.SeedRoot); + + using var request = new HttpRequestMessage(HttpMethod.Get, "/"); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown")); + using var response = await server.Client.SendAsync(request); + + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Equal("text/markdown", response.Content.Headers.ContentType?.MediaType); + Assert.Null(GetLinkHeader(response)); + } + + [Fact] + public async Task Healthz_skipped_from_link_middleware() + { + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + // healthz is a MapGet endpoint in production; here we just simulate + // a static text response to exercise the path-skip guard. + root.WriteFile("healthz/index.html", SamplePages.Html); + }); + + using var response = await server.Client.GetAsync("/healthz/"); + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Null(GetLinkHeader(response)); + } +} diff --git a/tests/StaticHost.Tests/MarkdownNegotiationTests.cs b/tests/StaticHost.Tests/MarkdownNegotiationTests.cs new file mode 100644 index 000000000..3b8160c4a --- /dev/null +++ b/tests/StaticHost.Tests/MarkdownNegotiationTests.cs @@ -0,0 +1,146 @@ +namespace StaticHost.Tests; + +public sealed class MarkdownNegotiationTests +{ + [Fact] + public async Task Get_with_markdown_accept_returns_md_companion() + { + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + SamplePages.SeedRoot(root); + SamplePages.SeedFolderPage(root, "get-started"); + }); + + using var request = new HttpRequestMessage(HttpMethod.Get, "/get-started/"); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown")); + using var response = await server.Client.SendAsync(request); + + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Equal("text/markdown", response.Content.Headers.ContentType?.MediaType); + Assert.Equal("utf-8", response.Content.Headers.ContentType?.CharSet); + var body = await response.Content.ReadAsStringAsync(); + Assert.StartsWith("#", body); + Assert.DoesNotContain(" SamplePages.SeedFolderPage(root, "get-started")); + + using var request = new HttpRequestMessage(HttpMethod.Get, "/get-started/"); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown")); + using var response = await server.Client.SendAsync(request); + + Assert.True(response.Headers.CacheControl?.Private ?? false, + "Cache-Control must be private to avoid Front Door cache-key explosion on Vary: Accept."); + Assert.Equal(TimeSpan.Zero, response.Headers.CacheControl?.MaxAge); + Assert.True(response.Headers.CacheControl?.MustRevalidate ?? false); + Assert.Contains("Accept", response.Headers.Vary); + } + + [Fact] + public async Task Head_with_markdown_accept_returns_headers_only() + { + await using var server = await AgentReadinessTestServer.StartAsync( + root => SamplePages.SeedFolderPage(root, "get-started")); + + using var request = new HttpRequestMessage(HttpMethod.Head, "/get-started/"); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown")); + using var response = await server.Client.SendAsync(request); + + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Equal("text/markdown", response.Content.Headers.ContentType?.MediaType); + // ContentLength header should be set even on HEAD. + Assert.NotNull(response.Content.Headers.ContentLength); + Assert.True(response.Content.Headers.ContentLength > 0); + } + + [Fact] + public async Task Browser_request_unaffected_by_negotiation_middleware() + { + await using var server = await AgentReadinessTestServer.StartAsync( + root => SamplePages.SeedFolderPage(root, "get-started")); + + using var request = new HttpRequestMessage(HttpMethod.Get, "/get-started/"); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/html")); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/xhtml+xml")); + using var response = await server.Client.SendAsync(request); + + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Equal("text/html", response.Content.Headers.ContentType?.MediaType); + // No Vary: Accept on plain HTML responses (so Front Door can still cache). + Assert.DoesNotContain("Accept", response.Headers.Vary); + } + + [Fact] + public async Task Markdown_preferred_with_no_companion_falls_back_to_html_when_acceptable() + { + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + // Page exists but has no .md companion (e.g. /reference/api/csharp/...) + root.WriteFile("reference/api/csharp/index.html", SamplePages.Html); + }); + + using var request = new HttpRequestMessage(HttpMethod.Get, "/reference/api/csharp/"); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown", 1.0)); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/html", 0.5)); + using var response = await server.Client.SendAsync(request); + + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + Assert.Equal("text/html", response.Content.Headers.ContentType?.MediaType); + } + + [Fact] + public async Task Markdown_only_without_companion_returns_406() + { + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + root.WriteFile("reference/api/csharp/index.html", SamplePages.Html); + }); + + using var request = new HttpRequestMessage(HttpMethod.Get, "/reference/api/csharp/"); + request.Headers.Accept.Clear(); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown")); + using var response = await server.Client.SendAsync(request); + + Assert.Equal(HttpStatusCode.NotAcceptable, response.StatusCode); + } + + [Fact] + public async Task Stray_md_without_html_sibling_does_not_get_served_as_markdown() + { + // Mirrors the LinkHeader stray-md test: a .md without a corresponding + // HTML page must NOT be served as a "companion" of the URL. + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + root.WriteFile("stray.md", "# stray\n"); + }); + + using var request = new HttpRequestMessage(HttpMethod.Get, "/stray/"); + request.Headers.Accept.Clear(); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown")); + using var response = await server.Client.SendAsync(request); + + // No HTML companion means no markdown either: must NOT 200 with the .md body. + Assert.NotEqual(HttpStatusCode.OK, response.StatusCode); + } + + [Fact] + public async Task WellKnown_paths_skip_markdown_negotiation() + { + await using var server = await AgentReadinessTestServer.StartAsync(root => + { + root.WriteFile(".well-known/agent-skills/index.json", "{\"skills\":[]}"); + }); + + using var request = new HttpRequestMessage(HttpMethod.Get, "/.well-known/agent-skills/index.json"); + request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("text/markdown")); + using var response = await server.Client.SendAsync(request); + + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + // Should still be served as JSON, not markdown. + Assert.Equal("application/json", response.Content.Headers.ContentType?.MediaType); + } +} diff --git a/tests/StaticHost.Tests/MarkdownPathMapperTests.cs b/tests/StaticHost.Tests/MarkdownPathMapperTests.cs new file mode 100644 index 000000000..193a21647 --- /dev/null +++ b/tests/StaticHost.Tests/MarkdownPathMapperTests.cs @@ -0,0 +1,108 @@ +using StaticHost.AgentReadiness; +using Microsoft.Extensions.FileProviders; + +namespace StaticHost.Tests; + +public sealed class MarkdownPathMapperTests +{ + private sealed class StubFileProvider : IFileProvider + { + private readonly HashSet _files; + + public StubFileProvider(IEnumerable files) => + _files = new HashSet(files, StringComparer.OrdinalIgnoreCase); + + public IDirectoryContents GetDirectoryContents(string subpath) => NotFoundDirectoryContents.Singleton; + + public IFileInfo GetFileInfo(string subpath) => + _files.Contains(subpath) ? new StubFileInfo(subpath) : new NotFoundFileInfo(subpath); + + public Microsoft.Extensions.Primitives.IChangeToken Watch(string filter) => NeverChangeToken.Instance; + } + + private sealed class NeverChangeToken : Microsoft.Extensions.Primitives.IChangeToken + { + public static readonly NeverChangeToken Instance = new(); + public bool HasChanged => false; + public bool ActiveChangeCallbacks => false; + public IDisposable RegisterChangeCallback(Action callback, object? state) => EmptyDisposable.Instance; + + private sealed class EmptyDisposable : IDisposable + { + public static readonly EmptyDisposable Instance = new(); + public void Dispose() { } + } + } + + private sealed class StubFileInfo : IFileInfo + { + public StubFileInfo(string name) => Name = name; + + public bool Exists => true; + public long Length => 0; + public string? PhysicalPath => null; + public string Name { get; } + public DateTimeOffset LastModified => DateTimeOffset.UnixEpoch; + public bool IsDirectory => false; + public Stream CreateReadStream() => new MemoryStream(); + } + + [Theory] + [InlineData("/", "/index.html", "/index.md")] + [InlineData("/get-started/", "/get-started/index.html", "/get-started.md")] + [InlineData("/get-started", "/get-started/index.html", "/get-started.md")] + [InlineData("/get-started/quickstart/", "/get-started/quickstart/index.html", "/get-started/quickstart.md")] + [InlineData("/get-started.html", "/get-started.html", "/get-started.md")] + public void Maps_to_expected_companion_when_html_and_md_both_exist(string requestPath, string htmlSibling, string expectedMd) + { + var provider = new StubFileProvider([htmlSibling, expectedMd]); + var actual = MarkdownPathMapper.TryGetMarkdownCompanion(new PathString(requestPath), provider); + Assert.Equal(expectedMd, actual); + } + + [Theory] + [InlineData("/_astro/app.js")] + [InlineData("/.well-known/agent-skills/index.json")] + [InlineData("/healthz")] + [InlineData("/install.ps1")] + [InlineData("/install.sh")] + [InlineData("/pagefind/pagefind.js")] + public void Skips_infrastructure_paths(string requestPath) + { + // Even if the file system lies and a .md exists, infra paths must skip. + var provider = new StubFileProvider([$"{requestPath}.md", "/index.md", "/index.html"]); + var actual = MarkdownPathMapper.TryGetMarkdownCompanion(new PathString(requestPath), provider); + Assert.Null(actual); + } + + [Fact] + public void Returns_null_when_no_companion_exists() + { + var provider = new StubFileProvider(["/get-started/index.html"]); + var actual = MarkdownPathMapper.TryGetMarkdownCompanion(new PathString("/get-started/"), provider); + Assert.Null(actual); + } + + [Theory] + [InlineData("/")] + [InlineData("/stray/")] + [InlineData("/stray")] + [InlineData("/stray.html")] + public void Returns_null_when_md_exists_but_html_sibling_does_not(string requestPath) + { + // A stray .md without a real HTML page MUST NOT be advertised or served as + // a companion. Not all pages on the site emit a .md; only those that also + // have a Starlight-generated HTML page are eligible. + var provider = new StubFileProvider(["/index.md", "/stray.md"]); + var actual = MarkdownPathMapper.TryGetMarkdownCompanion(new PathString(requestPath), provider); + Assert.Null(actual); + } + + [Fact] + public void Skips_paths_with_unhandled_extensions() + { + var provider = new StubFileProvider(["/data.json.md", "/data.json"]); + var actual = MarkdownPathMapper.TryGetMarkdownCompanion(new PathString("/data.json"), provider); + Assert.Null(actual); + } +} diff --git a/tests/StaticHost.Tests/SamplePages.cs b/tests/StaticHost.Tests/SamplePages.cs new file mode 100644 index 000000000..7c8145f76 --- /dev/null +++ b/tests/StaticHost.Tests/SamplePages.cs @@ -0,0 +1,32 @@ +namespace StaticHost.Tests; + +/// +/// Shared sample HTML/Markdown bodies and seed helpers for agent-readiness +/// middleware tests. Keeps and +/// in lock-step on what a "Starlight +/// page" looks like on disk. +/// +internal static class SamplePages +{ + public const string Html = "Hello"; + public const string Markdown = "# Hello\n\nMarkdown body."; + + /// + /// Seeds the on-disk shape produced by starlight-page-actions for a + /// folder-style page: {slug}/index.html + sibling {slug}.md. + /// + public static void SeedFolderPage(TempWebRoot root, string slug) + { + root.WriteFile($"{slug}/index.html", Html); + root.WriteFile($"{slug}.md", Markdown); + } + + /// + /// Seeds the root page: /index.html + /index.md. + /// + public static void SeedRoot(TempWebRoot root) + { + root.WriteFile("index.html", Html); + root.WriteFile("index.md", Markdown); + } +} diff --git a/tests/StaticHost.Tests/StaticHost.Tests.csproj b/tests/StaticHost.Tests/StaticHost.Tests.csproj new file mode 100644 index 000000000..c9d9f3af5 --- /dev/null +++ b/tests/StaticHost.Tests/StaticHost.Tests.csproj @@ -0,0 +1,31 @@ + + + + net10.0 + enable + enable + false + true + + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + diff --git a/tests/StaticHost.Tests/WellKnownArtifactTests.cs b/tests/StaticHost.Tests/WellKnownArtifactTests.cs new file mode 100644 index 000000000..cee3ea45d --- /dev/null +++ b/tests/StaticHost.Tests/WellKnownArtifactTests.cs @@ -0,0 +1,128 @@ +using System.Security.Cryptography; +using System.Text; +using System.Text.Json; +using Microsoft.Extensions.FileProviders; + +namespace StaticHost.Tests; + +/// +/// Validates the static well-known artifacts that ship via the frontend's +/// public/ directory. We read them off disk from the worktree because +/// they are static fixtures shared with the live deployment. +/// +public sealed class WellKnownArtifactTests +{ + private static string FrontendPublicDir + { + get + { + // The test binary lives under tests/StaticHost.Tests/bin/// + // Walk up to repo root, then descend to src/frontend/public. + var dir = new DirectoryInfo(AppContext.BaseDirectory); + while (dir is not null && !File.Exists(Path.Combine(dir.FullName, "Aspire.Dev.slnx"))) + { + dir = dir.Parent; + } + Assert.NotNull(dir); + return Path.Combine(dir.FullName, "src", "frontend", "public"); + } + } + + [Fact] + public void RobotsTxt_declares_content_signal_inside_user_agent_star_block() + { + var path = Path.Combine(FrontendPublicDir, "robots.txt"); + var text = File.ReadAllText(path); + + // Locate the User-agent: * group and verify Content-Signal appears + // before the next User-agent: directive (i.e. within the same group). + var lines = text.Split('\n').Select(l => l.Trim()).ToArray(); + var inStarGroup = false; + var sawContentSignal = false; + foreach (var line in lines) + { + if (line.StartsWith("User-agent:", StringComparison.OrdinalIgnoreCase)) + { + inStarGroup = string.Equals( + line[("User-agent:".Length)..].Trim(), + "*", + StringComparison.Ordinal); + } + else if (inStarGroup && + line.StartsWith("Content-Signal:", StringComparison.OrdinalIgnoreCase)) + { + sawContentSignal = true; + Assert.Contains("ai-train=yes", line); + Assert.Contains("search=yes", line); + Assert.Contains("ai-input=yes", line); + } + } + + Assert.True(sawContentSignal, + "robots.txt must contain a `Content-Signal:` directive inside the `User-agent: *` group."); + } + + [Fact] + public void AgentSkills_index_matches_RFC_v0_2_0_shape_and_digests() + { + var indexPath = Path.Combine(FrontendPublicDir, ".well-known", "agent-skills", "index.json"); + Assert.True(File.Exists(indexPath), $"Missing {indexPath}"); + + using var doc = JsonDocument.Parse(File.ReadAllText(indexPath)); + var root = doc.RootElement; + + Assert.True(root.TryGetProperty("$schema", out _), + "Agent Skills Discovery v0.2.0 requires a $schema property."); + Assert.True(root.TryGetProperty("version", out _)); + Assert.True(root.TryGetProperty("skills", out var skills)); + Assert.Equal(JsonValueKind.Array, skills.ValueKind); + Assert.True(skills.GetArrayLength() >= 1, "Expected at least one skill entry."); + + foreach (var skill in skills.EnumerateArray()) + { + Assert.True(skill.TryGetProperty("name", out var name) && name.ValueKind == JsonValueKind.String); + Assert.True(skill.TryGetProperty("type", out var type)); + Assert.Equal("skill-md", type.GetString()); + Assert.True(skill.TryGetProperty("description", out _)); + Assert.True(skill.TryGetProperty("url", out var urlEl)); + Assert.True(skill.TryGetProperty("digest", out var digestEl)); + + var digest = digestEl.GetString()!; + Assert.StartsWith("sha256:", digest, StringComparison.Ordinal); + + // Verify digest matches the actual file content. + var url = urlEl.GetString()!; + // url is server-relative, e.g. /.well-known/agent-skills/getting-started-with-aspire/SKILL.md + var localPath = Path.Combine( + FrontendPublicDir, + url.TrimStart('/').Replace('/', Path.DirectorySeparatorChar)); + + Assert.True(File.Exists(localPath), $"Missing skill file referenced by index.json: {localPath}"); + + var bytes = File.ReadAllBytes(localPath); + var actual = "sha256:" + Convert.ToHexStringLower(SHA256.HashData(bytes)); + Assert.Equal(digest, actual); + } + } + + [Fact] + public void AgentSkills_files_are_LF_only() + { + // Digests are byte-stable only when the working tree honors `eol=lf`. + // Anyone editing the file with CRLF in their editor would invalidate + // the digest in CI; this test catches that locally. + var skillsRoot = Path.Combine(FrontendPublicDir, ".well-known", "agent-skills"); + var files = Directory.EnumerateFiles(skillsRoot, "*", SearchOption.AllDirectories) + .Where(f => f.EndsWith(".md", StringComparison.OrdinalIgnoreCase) || + f.EndsWith("index.json", StringComparison.OrdinalIgnoreCase)); + + foreach (var file in files) + { + var bytes = File.ReadAllBytes(file); + var crIndex = Array.IndexOf(bytes, (byte)'\r'); + Assert.True(crIndex < 0, + $"{file} contains a CR byte at offset {crIndex}; agent-skills artifacts must be LF-only " + + "(see .gitattributes). This breaks the SHA-256 digest published in index.json."); + } + } +}