From 0b3dba039f8ce093d36c3b58b919d5ca1fbbd3c0 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 18:51:30 +0000 Subject: [PATCH 1/2] feat(client): add SEP-2663 tasks-extension client conformance scenario The server suite has ten tasks-* scenarios but the client suite had none (issue #374). Add one client-testing scenario, tasks-client-lifecycle, in which the harness acts as a task-capable server and observes the client driving the io.modelcontextprotocol/tasks extension: - sep-2663-client-handles-polymorphic-result (FAILURE): CreateTaskResult is recognized and polled to completion via tasks/get; a plain CallToolResult on the same negotiated session is handled normally. - sep-2663-client-emits-mcp-name-on-tasks-methods (FAILURE): every tasks/get, tasks/update, tasks/cancel POST stamps Mcp-Name with params.taskId (SEP-2243 routing headers). - sep-2663-client-honors-poll-interval (WARNING): consecutive polls of the same task respect pollIntervalMs, gated on the early side only so slow CI cannot flake it. Un-excluded from sep-2663.yaml accordingly. - sep-2663-cancel-not-via-cancelled-notification (FAILURE): the running task is cancelled via tasks/cancel, never notifications/cancelled. - sep-2663-client-rejects-task-result-on-unsupported (FAILURE): a CreateTaskResult returned to ping is treated as an invalid response, not driven as a real task. - Flow gates: tasks-client-extension-declared and tasks-client-terminal-failed-surfaced (failed tasks are surfaced and the flow continues instead of polling the terminal task forever). A client that never declares the extension capability (initialize capabilities.extensions or per-request _meta clientCapabilities) has not opted into the optional extension: all checks report SKIPPED. A client that never sends any request fails via untestableCheck. The scenario is registered in the client-testing list and the extensions suite (now derived from source.extensionId). The everything-client gains a conforming handler, and the vitest suite covers the positive path plus one negative case per check. Addresses #374 --- .../clients/typescript/everything-client.ts | 135 +++ src/scenarios/client/tasks-client.test.ts | 301 ++++++ src/scenarios/client/tasks-client.ts | 857 ++++++++++++++++++ src/scenarios/index.ts | 15 +- src/seps/sep-2663.yaml | 16 +- 5 files changed, 1319 insertions(+), 5 deletions(-) create mode 100644 src/scenarios/client/tasks-client.test.ts create mode 100644 src/scenarios/client/tasks-client.ts diff --git a/examples/clients/typescript/everything-client.ts b/examples/clients/typescript/everything-client.ts index ff6dd35f..9a0e746c 100644 --- a/examples/clients/typescript/everything-client.ts +++ b/examples/clients/typescript/everything-client.ts @@ -997,6 +997,141 @@ async function runMRTRClient(serverUrl: string): Promise { registerScenario('sep-2322-client-request-state', runMRTRClient); +// ============================================================================ +// Tasks extension client conformance (SEP-2663, issue #374) +// ============================================================================ + +const TASKS_EXTENSION_ID = 'io.modelcontextprotocol/tasks'; + +async function runTasksClient(serverUrl: string): Promise { + let nextId = 1; + + const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + + async function sendRpc( + method: string, + params: Record = {} + ): Promise<{ + result?: Record; + error?: { code: number; message: string }; + }> { + // Declare the tasks extension per-request (stateless-style negotiation). + const _meta = { + 'io.modelcontextprotocol/clientCapabilities': { + extensions: { [TASKS_EXTENSION_ID]: {} } + }, + ...((params._meta as object | undefined) ?? {}) + }; + const headers: Record = { + 'Content-Type': 'application/json', + Accept: 'application/json, text/event-stream', + 'Mcp-Method': method + }; + // SEP-2663 routing headers: Mcp-Name carries params.taskId on the + // tasks-namespace methods. + if ( + ['tasks/get', 'tasks/update', 'tasks/cancel'].includes(method) && + typeof params.taskId === 'string' + ) { + headers['Mcp-Name'] = params.taskId; + } + const resp = await fetch(serverUrl, { + method: 'POST', + headers, + body: JSON.stringify({ + jsonrpc: '2.0', + id: nextId++, + method, + params: { ...params, _meta } + }) + }); + if (resp.status === 202 || resp.status === 204) return { result: {} }; + return (await resp.json()) as { + result?: Record; + error?: { code: number; message: string }; + }; + } + + /** Poll tasks/get, honoring pollIntervalMs, until a terminal status. */ + async function pollUntilTerminal( + taskId: string, + pollIntervalMs: number + ): Promise> { + let interval = pollIntervalMs; + for (let i = 0; i < 20; i++) { + await sleep(interval); + const { result, error } = await sendRpc('tasks/get', { taskId }); + if (error) throw new Error(`tasks/get failed: ${error.message}`); + const task = result as Record; + if ( + ['completed', 'failed', 'cancelled'].includes(task.status as string) + ) { + return task; + } + if (typeof task.pollIntervalMs === 'number') { + interval = task.pollIntervalMs; + } + } + throw new Error(`task ${taskId} never reached a terminal status`); + } + + // Step 2: list tools. + await sendRpc('tools/list'); + + // Step 3: quick_task — server task-augments the call; poll to completion. + const quick = (await sendRpc('tools/call', { name: 'quick_task' })).result!; + if (quick.resultType === 'task') { + const terminal = await pollUntilTerminal( + quick.taskId as string, + (quick.pollIntervalMs as number) ?? 500 + ); + logger.debug('quick_task terminal:', terminal.status); + } + + // Step 4: sync_echo — plain CallToolResult on the same negotiated session. + await sendRpc('tools/call', { name: 'sync_echo' }); + + // Step 5: failing_task — poll until failed; surface the error and move on. + const failing = (await sendRpc('tools/call', { name: 'failing_task' })) + .result!; + if (failing.resultType === 'task') { + const terminal = await pollUntilTerminal( + failing.taskId as string, + (failing.pollIntervalMs as number) ?? 500 + ); + logger.debug( + 'failing_task terminal:', + terminal.status, + JSON.stringify(terminal.error) + ); + } + + // Step 6: cancel_task — cancel the running task via tasks/cancel, then + // observe the cancelled status with one confirming tasks/get. + const cancel = (await sendRpc('tools/call', { name: 'cancel_task' })).result!; + if (cancel.resultType === 'task') { + const taskId = cancel.taskId as string; + await sleep((cancel.pollIntervalMs as number) ?? 500); + await sendRpc('tasks/cancel', { taskId }); + const confirmed = await sendRpc('tasks/get', { taskId }); + logger.debug('cancel_task status:', confirmed.result?.status); + } + + // Step 7: ping — the scenario deliberately answers with a CreateTaskResult. + // Task augmentation is not supported for ping, so treat the response as + // invalid: log it and do NOT drive the tasks surface for that taskId. + const pong = await sendRpc('ping'); + if (pong.result?.resultType === 'task') { + logger.debug( + 'ping returned CreateTaskResult — invalid response for an unsupported request type; ignoring' + ); + } + + logger.debug('tasks-client-lifecycle scenario completed'); +} + +registerScenario('tasks-client-lifecycle', runTasksClient); + // ============================================================================ // WIF JWT-bearer scenario // ============================================================================ diff --git a/src/scenarios/client/tasks-client.test.ts b/src/scenarios/client/tasks-client.test.ts new file mode 100644 index 00000000..908e4e5b --- /dev/null +++ b/src/scenarios/client/tasks-client.test.ts @@ -0,0 +1,301 @@ +/** + * Tests for the SEP-2663 tasks-extension CLIENT scenario (issue #374). + * + * Positive path: the everything-client's tasks handler runs in-process + * against the scenario server and every check passes. + * + * Negative paths: deliberately-broken raw-fetch clients violate one + * requirement at a time and the matching check flips to FAILURE/WARNING + * (pattern from http-custom-headers.test.ts). + */ +import { describe, test, it, expect } from 'vitest'; +import { + runClientAgainstScenario, + InlineClientRunner +} from './auth/test_helpers/testClient'; +import { getHandler } from '../../../examples/clients/typescript/everything-client'; +import { testScenarioContext } from '../../mock-server/testing'; +import { + TasksClientScenario, + TASKS_CLIENT_DECLARED_CHECK_IDS, + TASKS_CLIENT_POLL_INTERVAL_MS +} from './tasks-client'; + +const TASKS_EXTENSION_ID = 'io.modelcontextprotocol/tasks'; +const TASKS_META = { + 'io.modelcontextprotocol/clientCapabilities': { + extensions: { [TASKS_EXTENSION_ID]: {} } + } +}; + +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +let nextId = 1; +async function rpc( + serverUrl: string, + method: string, + params: Record = {}, + options: { declare?: boolean; omitMcpName?: boolean } = {} +): Promise { + const declare = options.declare ?? true; + const headers: Record = { + 'Content-Type': 'application/json', + Accept: 'application/json, text/event-stream', + 'Mcp-Method': method + }; + if ( + ['tasks/get', 'tasks/update', 'tasks/cancel'].includes(method) && + typeof params.taskId === 'string' && + !options.omitMcpName + ) { + headers['Mcp-Name'] = params.taskId; + } + const body: Record = { + jsonrpc: '2.0', + id: nextId++, + method, + params: declare ? { ...params, _meta: TASKS_META } : params + }; + const resp = await fetch(serverUrl, { + method: 'POST', + headers, + body: JSON.stringify(body) + }); + if (resp.status === 202 || resp.status === 204) return {}; + const json = (await resp.json()) as { result?: any; error?: any }; + return json.result ?? json; +} + +function statusOf( + checks: { id: string; status: string }[], + id: string +): string | undefined { + return checks.find((c) => c.id === id)?.status; +} + +/** + * Drive the full conforming script with raw fetch, with hooks to break one + * behavior at a time. + */ +async function runConformingScript( + serverUrl: string, + overrides: { + /** Skip the pollIntervalMs sleeps (poll immediately). */ + pollImmediately?: boolean; + /** Do not send the Mcp-Name header on tasks/* requests. */ + omitMcpName?: boolean; + /** Cancel via notifications/cancelled instead of tasks/cancel. */ + cancelViaNotification?: boolean; + /** Treat CreateTaskResult as a plain result: never poll quick_task. */ + ignoreCreateTaskResult?: boolean; + /** Poll the bogus task returned to ping. */ + pollBogusPingTask?: boolean; + } = {} +): Promise { + const wait = (ms: number) => + overrides.pollImmediately ? Promise.resolve() : sleep(ms); + const taskOpts = { omitMcpName: overrides.omitMcpName ?? false }; + + await rpc(serverUrl, 'tools/list'); + + // quick_task + const quick = await rpc(serverUrl, 'tools/call', { name: 'quick_task' }); + if (!overrides.ignoreCreateTaskResult && quick.resultType === 'task') { + let interval = quick.pollIntervalMs ?? TASKS_CLIENT_POLL_INTERVAL_MS; + for (let i = 0; i < 10; i++) { + await wait(interval); + const task = await rpc( + serverUrl, + 'tasks/get', + { taskId: quick.taskId }, + taskOpts + ); + if (['completed', 'failed', 'cancelled'].includes(task.status)) break; + interval = task.pollIntervalMs ?? interval; + } + } + + // sync_echo + await rpc(serverUrl, 'tools/call', { name: 'sync_echo' }); + + // failing_task + const failing = await rpc(serverUrl, 'tools/call', { name: 'failing_task' }); + if (failing.resultType === 'task') { + await wait(failing.pollIntervalMs ?? TASKS_CLIENT_POLL_INTERVAL_MS); + await rpc(serverUrl, 'tasks/get', { taskId: failing.taskId }, taskOpts); + } + + // cancel_task + const cancel = await rpc(serverUrl, 'tools/call', { name: 'cancel_task' }); + if (cancel.resultType === 'task') { + await wait(cancel.pollIntervalMs ?? TASKS_CLIENT_POLL_INTERVAL_MS); + if (overrides.cancelViaNotification) { + await rpc(serverUrl, 'notifications/cancelled', { + requestId: 999, + reason: `cancel task ${cancel.taskId}` + }); + } else { + await rpc(serverUrl, 'tasks/cancel', { taskId: cancel.taskId }, taskOpts); + await rpc(serverUrl, 'tasks/get', { taskId: cancel.taskId }, taskOpts); + } + } + + // ping → invalid CreateTaskResult + const pong = await rpc(serverUrl, 'ping'); + if (overrides.pollBogusPingTask && pong.resultType === 'task') { + await rpc(serverUrl, 'tasks/get', { taskId: pong.taskId }, taskOpts); + } +} + +describe('tasks-client-lifecycle scenario (SEP-2663, issue #374)', () => { + test('everything-client passes every check', async () => { + const clientFn = getHandler('tasks-client-lifecycle'); + if (!clientFn) { + throw new Error( + 'No handler registered for scenario: tasks-client-lifecycle' + ); + } + const runner = new InlineClientRunner(clientFn); + // runClientAgainstScenario asserts every non-INFO check is SUCCESS. + await runClientAgainstScenario(runner, 'tasks-client-lifecycle'); + }, 30000); + + it('emits exactly the declared check IDs as failures when no client connects', async () => { + const scenario = new TasksClientScenario(); + await scenario.start(testScenarioContext()); + try { + const checks = scenario.getChecks(); + expect(new Set(checks.map((c) => c.id))).toEqual( + new Set(TASKS_CLIENT_DECLARED_CHECK_IDS) + ); + for (const check of checks) { + // Severity follows the requirement keyword: the SHOULD-level + // poll-interval check reports WARNING, everything else FAILURE. + expect(check.status).toBe( + check.id === 'sep-2663-client-honors-poll-interval' + ? 'WARNING' + : 'FAILURE' + ); + expect(check.details?.untestable).toBe(true); + } + } finally { + await scenario.stop(); + } + }); + + it('SKIPs every check when the client never declares the tasks extension', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + // A tasks-unaware client: plain flow, no capability declaration. The + // server (spec-compliant) falls through to sync execution. + await rpc(serverUrl, 'tools/list', {}, { declare: false }); + const result = await rpc( + serverUrl, + 'tools/call', + { name: 'quick_task' }, + { declare: false } + ); + expect(result.resultType).toBe('complete'); + const checks = scenario.getChecks(); + expect(new Set(checks.map((c) => c.id))).toEqual( + new Set(TASKS_CLIENT_DECLARED_CHECK_IDS) + ); + for (const check of checks) { + expect(check.status).toBe('SKIPPED'); + } + } finally { + await scenario.stop(); + } + }); + + it('passes all checks for a conforming raw-fetch client', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl); + const checks = scenario.getChecks(); + for (const check of checks) { + expect(check.status, `${check.id}: ${check.errorMessage ?? ''}`).toBe( + 'SUCCESS' + ); + } + } finally { + await scenario.stop(); + } + }, 30000); + + it('FAILs polymorphic-result handling when the client never polls the created task', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl, { ignoreCreateTaskResult: true }); + const checks = scenario.getChecks(); + expect( + statusOf(checks, 'sep-2663-client-handles-polymorphic-result') + ).toBe('FAILURE'); + } finally { + await scenario.stop(); + } + }, 30000); + + it('FAILs the Mcp-Name check when tasks/* requests omit the header', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl, { omitMcpName: true }); + const checks = scenario.getChecks(); + expect( + statusOf(checks, 'sep-2663-client-emits-mcp-name-on-tasks-methods') + ).toBe('FAILURE'); + // The rest of the flow is intact. + expect( + statusOf(checks, 'sep-2663-client-handles-polymorphic-result') + ).toBe('SUCCESS'); + } finally { + await scenario.stop(); + } + }, 30000); + + it('WARNs on poll cadence when the client polls without waiting pollIntervalMs', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl, { pollImmediately: true }); + const checks = scenario.getChecks(); + expect(statusOf(checks, 'sep-2663-client-honors-poll-interval')).toBe( + 'WARNING' + ); + } finally { + await scenario.stop(); + } + }, 30000); + + it('FAILs the cancellation-channel check when the client uses notifications/cancelled', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl, { cancelViaNotification: true }); + const checks = scenario.getChecks(); + expect( + statusOf(checks, 'sep-2663-cancel-not-via-cancelled-notification') + ).toBe('FAILURE'); + } finally { + await scenario.stop(); + } + }, 30000); + + it('FAILs the unsupported-request check when the client polls the task returned to ping', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl, { pollBogusPingTask: true }); + const checks = scenario.getChecks(); + expect( + statusOf(checks, 'sep-2663-client-rejects-task-result-on-unsupported') + ).toBe('FAILURE'); + } finally { + await scenario.stop(); + } + }, 30000); +}); diff --git a/src/scenarios/client/tasks-client.ts b/src/scenarios/client/tasks-client.ts new file mode 100644 index 00000000..1a1aac35 --- /dev/null +++ b/src/scenarios/client/tasks-client.ts @@ -0,0 +1,857 @@ +/** + * SEP-2663 Tasks Extension — CLIENT conformance (issue #374). + * + * The harness acts as a task-capable MCP server and observes how the client + * under test drives the io.modelcontextprotocol/tasks extension: + * + * - Polymorphic results: a `tools/call` the server task-augments returns a + * `CreateTaskResult`; the client must recognize `resultType:"task"` and + * drive `tasks/get` polling to the inlined result, while a plain + * `CallToolResult` on the same negotiated session is handled normally. + * > "A client that has negotiated this extension MUST be prepared to + * > handle either CallToolResult or CreateTaskResult in response to any + * > supported request it issues." + * - Routing headers: every `tasks/get` / `tasks/update` / `tasks/cancel` + * POST must stamp `Mcp-Name` with `params.taskId`. + * > "When tasks/get, tasks/update, or tasks/cancel is sent over the + * > Streamable HTTP transport, the client MUST set the Mcp-Name header + * > (defined by SEP-2243) to the value of params.taskId." + * - Poll cadence: `pollIntervalMs` is honored between consecutive polls + * (gated on the early side only, so slow CI cannot flake it). + * > "Clients SHOULD respect the pollIntervalMs provided in responses when + * > determining polling frequency." + * - Terminal handling: a `failed` task (inlined JSON-RPC `error`) is + * surfaced and the flow continues instead of polling forever. + * - Cancellation channel: + * > "The notifications/cancelled notification MUST NOT be used for task + * > cancellation." — cancellation must arrive as `tasks/cancel`. + * - Invalid augmentation: a `CreateTaskResult` returned to a request type + * that does not support task augmentation (`ping`) must be rejected. + * > "A client that receives CreateTaskResult in response to an + * > unsupported request type MUST interpret this as an invalid response + * > to the request." + * + * A client that never declares the extension capability (neither in + * `initialize` `capabilities.extensions` nor per-request in + * `_meta["io.modelcontextprotocol/clientCapabilities"].extensions`) simply + * has not opted into this optional extension: all checks report SKIPPED, + * mirroring how optional capabilities are treated elsewhere. A client that + * never sends any request at all is a failed run, not an opt-out. + */ + +import http from 'http'; +import { + ConformanceCheck, + ScenarioSource, + SpecReference +} from '../../types.js'; +import { BaseHttpScenario } from './http-base.js'; +import { + withRequiredDraftResultFields, + type ScenarioContext +} from '../../mock-server'; +import { SEP_2663_REF, SEP_2243_REF } from '../server/tasks/mrtr-helpers'; +import { TASKS_EXTENSION_ID } from '../server/tasks/helpers'; +import { untestableCheck } from '../untestable'; + +/** pollIntervalMs advertised on every non-terminal task response. */ +export const TASKS_CLIENT_POLL_INTERVAL_MS = 300; +/** + * Early-side tolerance: a poll is only flagged when it arrives more than + * this many milliseconds before pollIntervalMs has elapsed. Late polls are + * never flagged (slow CI must not flake the check), mirroring the sse-retry + * scenario's early-side-only timing gate. + */ +export const TASKS_CLIENT_POLL_TOLERANCE_MS = 50; + +const TASK_QUICK = 'task-quick-0001'; +const TASK_FAIL = 'task-fail-0001'; +const TASK_CANCEL = 'task-cancel-0001'; +/** taskId of the invalid CreateTaskResult returned to `ping`. */ +const TASK_BOGUS = 'task-bogus-0001'; + +const TASK_TTL_MS = 60_000; + +/** Every check id this scenario emits, in a stable order. */ +export const TASKS_CLIENT_DECLARED_CHECK_IDS = [ + 'tasks-client-extension-declared', + 'sep-2663-client-handles-polymorphic-result', + 'sep-2663-client-emits-mcp-name-on-tasks-methods', + 'sep-2663-client-honors-poll-interval', + 'tasks-client-terminal-failed-surfaced', + 'sep-2663-cancel-not-via-cancelled-notification', + 'sep-2663-client-rejects-task-result-on-unsupported' +] as const; + +const CHECK_META: Record< + (typeof TASKS_CLIENT_DECLARED_CHECK_IDS)[number], + { name: string; description: string; specReferences: SpecReference[] } +> = { + 'tasks-client-extension-declared': { + name: 'TasksClientExtensionDeclared', + description: + 'Flow gate: client declares io.modelcontextprotocol/tasks (initialize capabilities.extensions or per-request _meta clientCapabilities) so the tasks surface is negotiated', + specReferences: [SEP_2663_REF] + }, + 'sep-2663-client-handles-polymorphic-result': { + name: 'TasksClientHandlesPolymorphicResult', + description: + 'A client that has negotiated this extension MUST be prepared to handle either CallToolResult or CreateTaskResult in response to any supported request it issues (drives tasks/get on CreateTaskResult; continues normally on CallToolResult)', + specReferences: [SEP_2663_REF] + }, + 'sep-2663-client-emits-mcp-name-on-tasks-methods': { + name: 'TasksClientEmitsMcpNameOnTasksMethods', + description: + 'When tasks/get, tasks/update, or tasks/cancel is sent over the Streamable HTTP transport, the client MUST set the Mcp-Name header (defined by SEP-2243) to the value of params.taskId', + specReferences: [SEP_2663_REF, SEP_2243_REF] + }, + 'sep-2663-client-honors-poll-interval': { + name: 'TasksClientHonorsPollInterval', + description: + 'Clients SHOULD respect the pollIntervalMs provided in responses when determining polling frequency (gated on the early side only)', + specReferences: [SEP_2663_REF] + }, + 'tasks-client-terminal-failed-surfaced': { + name: 'TasksClientTerminalFailedSurfaced', + description: + 'Flow gate: a task that reaches status "failed" (inlined JSON-RPC error) is surfaced — the client continues the script instead of polling the terminal task indefinitely', + specReferences: [SEP_2663_REF] + }, + 'sep-2663-cancel-not-via-cancelled-notification': { + name: 'TasksClientCancelsViaTasksCancel', + description: + 'The notifications/cancelled notification MUST NOT be used for task cancellation — the client cancels the running task via tasks/cancel', + specReferences: [SEP_2663_REF] + }, + 'sep-2663-client-rejects-task-result-on-unsupported': { + name: 'TasksClientRejectsTaskResultOnUnsupported', + description: + 'A client that receives CreateTaskResult in response to an unsupported request type (ping) MUST interpret this as an invalid response to the request (and MUST NOT treat it as a real task)', + specReferences: [SEP_2663_REF] + } +}; + +const TOOLS = [ + { + name: 'sync_echo', + description: + 'Sync-only tool: always returns a plain CallToolResult immediately.', + inputSchema: { type: 'object' as const, properties: {} } + }, + { + name: 'quick_task', + description: + 'Task-augmented tool: tools/call returns CreateTaskResult; the task completes on the second tasks/get.', + inputSchema: { type: 'object' as const, properties: {} } + }, + { + name: 'failing_task', + description: + 'Task-augmented tool: the task settles to status "failed" with an inlined JSON-RPC error on the first tasks/get.', + inputSchema: { type: 'object' as const, properties: {} } + }, + { + name: 'cancel_task', + description: + 'Task-augmented tool: the task stays "working" forever; the client must cancel it via tasks/cancel.', + inputSchema: { type: 'object' as const, properties: {} } + } +]; + +interface JsonRpcRequest { + jsonrpc: '2.0'; + id?: string | number | null; + method: string; + params?: Record; +} + +/** Accept the raw value or the SEP-2243 `=?base64?...?=` encoded form. */ +function mcpNameMatches(headerValue: string, expected: string): boolean { + if (headerValue === expected) return true; + const encoded = /^=\?base64\?(.+)\?=$/.exec(headerValue); + if (encoded) { + try { + return Buffer.from(encoded[1], 'base64').toString('utf-8') === expected; + } catch { + return false; + } + } + return false; +} + +export class TasksClientScenario extends BaseHttpScenario { + name = 'tasks-client-lifecycle'; + override readonly source: ScenarioSource = { + extensionId: 'io.modelcontextprotocol/tasks' + }; + // A conformant client may surface the invalid CreateTaskResult returned to + // `ping` (final script step) as an error and exit non-zero. + override allowClientError = true; + description = `Test SEP-2663 tasks-extension behavior of the client under test. + +The harness serves a task-capable MCP server. The client is expected to run +this script (each step maps to one or more checks): + +1. Declare the \`io.modelcontextprotocol/tasks\` extension — either in + \`initialize\` \`capabilities.extensions\`, or per-request in + \`_meta["io.modelcontextprotocol/clientCapabilities"].extensions\`. + A client that never declares it has not opted into the extension and + all checks report SKIPPED. +2. \`tools/list\`. +3. \`tools/call\` \`quick_task\` → the server task-augments the call and + returns \`CreateTaskResult\` (\`resultType:"task"\`, \`status:"working"\`, + \`pollIntervalMs:${TASKS_CLIENT_POLL_INTERVAL_MS}\`). Poll \`tasks/get\` until \`status:"completed"\` + (first poll returns \`working\`, the second \`completed\` with the tool + result inlined under \`result\`). +4. \`tools/call\` \`sync_echo\` → plain \`CallToolResult\`; continue normally. +5. \`tools/call\` \`failing_task\` → \`CreateTaskResult\`; the first + \`tasks/get\` returns \`status:"failed"\` with an inlined JSON-RPC + \`error\`. Surface it and continue — do not keep polling the terminal + task. +6. \`tools/call\` \`cancel_task\` → \`CreateTaskResult\` for a task that + never completes. Cancel it with \`tasks/cancel\` (never with + \`notifications/cancelled\`); a follow-up \`tasks/get\` observes + \`status:"cancelled"\`. +7. \`ping\` → the server (deliberately non-conformant) replies with a + \`CreateTaskResult\`. Task augmentation is not supported for ping, so + the client MUST treat the response as invalid and MUST NOT issue + \`tasks/get\`/\`tasks/update\`/\`tasks/cancel\` for that taskId. + +Every \`tasks/get\`, \`tasks/update\`, and \`tasks/cancel\` POST MUST carry +the \`Mcp-Name\` header set to \`params.taskId\` (SEP-2243 routing headers), +and consecutive polls of the same task SHOULD be at least \`pollIntervalMs\` +apart (only early polls are flagged).`; + + // ── Per-run observation state (reset in start()) ──────────────────────── + private requestCounter = 0; + private sawAnyRequest = false; + private extensionDeclared = false; + + private quickTaskCreated = false; + private quickTaskGets = 0; + private quickCompletedDelivered = false; + + private syncEchoCalledAt: number | null = null; + + private failTaskCreated = false; + private failedDeliveredAt: number | null = null; + private postFailedTerminalGets = 0; + + private cancelTaskCreated = false; + private cancelTaskCancelled = false; + private tasksCancelObserved = false; + private cancelledNotifications: string[] = []; + + private pingObserved = false; + private bogusTaskRequests: string[] = []; + + private tasksMethodRequests = 0; + private mcpNameViolations: string[] = []; + + /** For the early-side poll-cadence gate: last non-terminal tasks/get + * response time per taskId. */ + private lastPollRespondedAt = new Map(); + private measuredPollGapsMs: number[] = []; + private earlyPolls: string[] = []; + + override async start(ctx: ScenarioContext) { + this.requestCounter = 0; + this.sawAnyRequest = false; + this.extensionDeclared = false; + this.quickTaskCreated = false; + this.quickTaskGets = 0; + this.quickCompletedDelivered = false; + this.syncEchoCalledAt = null; + this.failTaskCreated = false; + this.failedDeliveredAt = null; + this.postFailedTerminalGets = 0; + this.cancelTaskCreated = false; + this.cancelTaskCancelled = false; + this.tasksCancelObserved = false; + this.cancelledNotifications = []; + this.pingObserved = false; + this.bogusTaskRequests = []; + this.tasksMethodRequests = 0; + this.mcpNameViolations = []; + this.lastPollRespondedAt.clear(); + this.measuredPollGapsMs = []; + this.earlyPolls = []; + return super.start(ctx); + } + + protected override discoverCapabilities(): object { + return { tools: {}, extensions: { [TASKS_EXTENSION_ID]: {} } }; + } + + /** Whether this request (or the session's initialize) declared the tasks + * extension capability. */ + private requestDeclaresExtension(request: JsonRpcRequest): boolean { + const meta = request.params?._meta as Record | undefined; + const caps = meta?.['io.modelcontextprotocol/clientCapabilities'] as + | { extensions?: Record } + | undefined; + return Boolean(caps?.extensions?.[TASKS_EXTENSION_ID]); + } + + private taskEnvelope( + taskId: string, + status: string, + extra: Record = {} + ): Record { + const now = new Date().toISOString(); + const envelope: Record = { + taskId, + status, + createdAt: now, + lastUpdatedAt: now, + ttlMs: TASK_TTL_MS, + ...extra + }; + if (!['completed', 'failed', 'cancelled'].includes(status)) { + envelope.pollIntervalMs = TASKS_CLIENT_POLL_INTERVAL_MS; + } + return envelope; + } + + private sendResult( + res: http.ServerResponse, + request: JsonRpcRequest, + result: Record + ): void { + this.sendJson(res, { jsonrpc: '2.0', id: request.id, result }); + } + + private sendError( + res: http.ServerResponse, + request: JsonRpcRequest, + code: number, + message: string + ): void { + this.sendJson(res, { + jsonrpc: '2.0', + id: request.id, + error: { code, message } + }); + } + + protected override handlePost( + req: http.IncomingMessage, + res: http.ServerResponse, + request: JsonRpcRequest + ): void { + this.sawAnyRequest = true; + this.requestCounter++; + if (this.requestDeclaresExtension(request)) { + this.extensionDeclared = true; + } + + const method = request.method; + const params = request.params ?? {}; + + // ── Routing-header + cadence observation for tasks/* methods ────────── + if (['tasks/get', 'tasks/update', 'tasks/cancel'].includes(method)) { + this.tasksMethodRequests++; + const taskId = params.taskId as string | undefined; + const header = req.headers['mcp-name']; + const headerValue = Array.isArray(header) ? header[0] : header; + if (typeof taskId === 'string') { + if (headerValue === undefined) { + this.mcpNameViolations.push( + `${method} for ${taskId}: Mcp-Name header missing` + ); + } else if (!mcpNameMatches(headerValue, taskId)) { + this.mcpNameViolations.push( + `${method} for ${taskId}: Mcp-Name header is ${JSON.stringify(headerValue)}, expected params.taskId` + ); + } + if (taskId === TASK_BOGUS) { + this.bogusTaskRequests.push(method); + } + if (method === 'tasks/get') { + const last = this.lastPollRespondedAt.get(taskId); + if (last !== undefined) { + const gap = Date.now() - last; + this.measuredPollGapsMs.push(gap); + if ( + gap < + TASKS_CLIENT_POLL_INTERVAL_MS - TASKS_CLIENT_POLL_TOLERANCE_MS + ) { + this.earlyPolls.push( + `tasks/get for ${taskId} arrived ${gap}ms after the previous poll (pollIntervalMs: ${TASKS_CLIENT_POLL_INTERVAL_MS})` + ); + } + } + } + } else { + this.mcpNameViolations.push(`${method}: params.taskId missing`); + } + } + + switch (method) { + case 'initialize': { + const caps = params.capabilities as + | { extensions?: Record } + | undefined; + if (caps?.extensions?.[TASKS_EXTENSION_ID]) { + this.extensionDeclared = true; + } + const requested = params.protocolVersion; + this.sendResult(res, request, { + protocolVersion: + typeof requested === 'string' ? requested : '2025-11-25', + serverInfo: { name: this.name + '-server', version: '1.0.0' }, + capabilities: this.discoverCapabilities() + }); + return; + } + + case 'notifications/cancelled': { + this.cancelledNotifications.push(JSON.stringify(params)); + this.sendNotificationAck(res); + return; + } + + case 'ping': { + this.pingObserved = true; + if (this.extensionDeclared) { + // Deliberately invalid: ping does not support task augmentation. + // The client MUST treat this CreateTaskResult as an invalid + // response and MUST NOT drive the tasks surface for TASK_BOGUS. + this.sendResult(res, request, { + resultType: 'task', + ...this.taskEnvelope(TASK_BOGUS, 'working') + }); + } else { + this.sendResult(res, request, { resultType: 'complete' }); + } + return; + } + + case 'tools/list': { + this.sendResult( + res, + request, + withRequiredDraftResultFields('tools/list', { + tools: TOOLS + }) as Record + ); + return; + } + + case 'tools/call': { + this.handleToolsCall(res, request); + return; + } + + case 'tasks/get': { + this.handleTasksGet(res, request); + return; + } + + case 'tasks/cancel': { + const taskId = params.taskId as string | undefined; + if ( + taskId === undefined || + ![TASK_QUICK, TASK_FAIL, TASK_CANCEL].includes(taskId) + ) { + this.sendError(res, request, -32602, `Unknown task: ${taskId}`); + return; + } + if (taskId === TASK_CANCEL) { + this.tasksCancelObserved = true; + this.cancelTaskCancelled = true; + } + // Empty ack (idempotent for terminal tasks). + this.sendResult(res, request, { resultType: 'complete' }); + return; + } + + case 'tasks/update': { + // No MRTR flow in this scenario; acknowledge with an empty result. + this.sendResult(res, request, { resultType: 'complete' }); + return; + } + + default: { + if (method.startsWith('notifications/')) { + this.sendNotificationAck(res); + return; + } + this.sendError(res, request, -32601, `Method not found: ${method}`); + return; + } + } + } + + private handleToolsCall( + res: http.ServerResponse, + request: JsonRpcRequest + ): void { + const toolName = request.params?.name as string | undefined; + + // A spec-compliant server MUST NOT return CreateTaskResult to a client + // that did not include the extension capability — non-declaring clients + // fall through to synchronous execution for every tool. + const augment = + this.extensionDeclared || this.requestDeclaresExtension(request); + + switch (toolName) { + case 'sync_echo': { + this.syncEchoCalledAt = this.requestCounter; + this.sendResult(res, request, { + resultType: 'complete', + content: [{ type: 'text', text: 'sync-ok' }] + }); + return; + } + case 'quick_task': { + if (!augment) { + this.sendResult(res, request, { + resultType: 'complete', + content: [{ type: 'text', text: 'quick-sync-fallback' }] + }); + return; + } + this.quickTaskCreated = true; + this.sendResult(res, request, { + resultType: 'task', + ...this.taskEnvelope(TASK_QUICK, 'working') + }); + return; + } + case 'failing_task': { + if (!augment) { + this.sendResult(res, request, { + resultType: 'complete', + isError: true, + content: [{ type: 'text', text: 'fail-sync-fallback' }] + }); + return; + } + this.failTaskCreated = true; + this.sendResult(res, request, { + resultType: 'task', + ...this.taskEnvelope(TASK_FAIL, 'working') + }); + return; + } + case 'cancel_task': { + if (!augment) { + this.sendResult(res, request, { + resultType: 'complete', + content: [{ type: 'text', text: 'cancel-sync-fallback' }] + }); + return; + } + this.cancelTaskCreated = true; + this.sendResult(res, request, { + resultType: 'task', + ...this.taskEnvelope(TASK_CANCEL, 'working') + }); + return; + } + default: { + this.sendError(res, request, -32602, `Unknown tool: ${toolName}`); + return; + } + } + } + + private handleTasksGet( + res: http.ServerResponse, + request: JsonRpcRequest + ): void { + const taskId = request.params?.taskId as string | undefined; + switch (taskId) { + case TASK_QUICK: { + this.quickTaskGets++; + if (this.quickTaskGets >= 2) { + this.quickCompletedDelivered = true; + this.lastPollRespondedAt.delete(TASK_QUICK); + this.sendResult(res, request, { + resultType: 'complete', + ...this.taskEnvelope(TASK_QUICK, 'completed', { + result: { + content: [{ type: 'text', text: 'quick-task-result' }] + } + }) + }); + } else { + this.sendResult(res, request, { + resultType: 'complete', + ...this.taskEnvelope(TASK_QUICK, 'working') + }); + this.lastPollRespondedAt.set(TASK_QUICK, Date.now()); + } + return; + } + case TASK_FAIL: { + if (this.failedDeliveredAt !== null) { + this.postFailedTerminalGets++; + } else { + this.failedDeliveredAt = this.requestCounter; + } + this.lastPollRespondedAt.delete(TASK_FAIL); + this.sendResult(res, request, { + resultType: 'complete', + ...this.taskEnvelope(TASK_FAIL, 'failed', { + statusMessage: 'deliberate failure for conformance testing', + error: { + code: -32603, + message: 'Internal error: failing_task always fails' + } + }) + }); + return; + } + case TASK_CANCEL: { + if (this.cancelTaskCancelled) { + this.lastPollRespondedAt.delete(TASK_CANCEL); + this.sendResult(res, request, { + resultType: 'complete', + ...this.taskEnvelope(TASK_CANCEL, 'cancelled') + }); + } else { + this.sendResult(res, request, { + resultType: 'complete', + ...this.taskEnvelope(TASK_CANCEL, 'working') + }); + this.lastPollRespondedAt.set(TASK_CANCEL, Date.now()); + } + return; + } + default: { + this.sendError(res, request, -32602, `Unknown task: ${taskId}`); + return; + } + } + } + + // ── Check synthesis ────────────────────────────────────────────────────── + + private check( + id: (typeof TASKS_CLIENT_DECLARED_CHECK_IDS)[number], + errs: string[], + details?: Record + ): ConformanceCheck { + const meta = CHECK_META[id]; + return { + id, + name: meta.name, + description: meta.description, + status: errs.length === 0 ? 'SUCCESS' : 'FAILURE', + timestamp: new Date().toISOString(), + errorMessage: errs.length > 0 ? errs.join('; ') : undefined, + specReferences: meta.specReferences, + details + }; + } + + getChecks(): ConformanceCheck[] { + // Build fresh each call — the runner may call getChecks() repeatedly. + if (!this.sawAnyRequest) { + return TASKS_CLIENT_DECLARED_CHECK_IDS.map((id) => { + const meta = CHECK_META[id]; + return untestableCheck( + id, + meta.name, + meta.description, + 'client never sent a request to the scenario server', + meta.specReferences, + id === 'sep-2663-client-honors-poll-interval' ? 'WARNING' : 'FAILURE' + ); + }); + } + + if (!this.extensionDeclared) { + // Optional extension the client never opted into: legitimately not + // applicable (the server never task-augmented anything), so SKIPPED + // rather than untestable-FAILURE. + return TASKS_CLIENT_DECLARED_CHECK_IDS.map((id) => { + const meta = CHECK_META[id]; + return { + id, + name: meta.name, + description: meta.description, + status: 'SKIPPED' as const, + timestamp: new Date().toISOString(), + errorMessage: + 'Skipped: client never declared the io.modelcontextprotocol/tasks extension capability (neither initialize capabilities.extensions nor per-request _meta clientCapabilities); tasks-extension client requirements are not applicable', + specReferences: meta.specReferences + }; + }); + } + + const checks: ConformanceCheck[] = []; + + // 1. Flow gate: extension declared (true on this code path). + checks.push(this.check('tasks-client-extension-declared', [])); + + // 2. Polymorphic result handling (MUST → FAILURE). + { + const errs: string[] = []; + if (!this.quickTaskCreated) { + errs.push( + 'client never issued tools/call for quick_task with the tasks capability declared' + ); + } else if (this.quickTaskGets === 0) { + errs.push( + 'client received CreateTaskResult for quick_task but never issued tasks/get for its taskId — the task-augmented response was not handled' + ); + } else if (!this.quickCompletedDelivered) { + errs.push( + 'client stopped polling quick_task before tasks/get returned status "completed"' + ); + } + if (this.syncEchoCalledAt === null) { + errs.push('client never issued tools/call for sync_echo'); + } else if (this.requestCounter <= this.syncEchoCalledAt) { + errs.push( + 'client stopped after receiving the plain CallToolResult for sync_echo — both result shapes must be handled on a negotiated session' + ); + } + checks.push( + this.check('sep-2663-client-handles-polymorphic-result', errs, { + quickTaskGets: this.quickTaskGets, + quickCompletedDelivered: this.quickCompletedDelivered, + syncEchoCalled: this.syncEchoCalledAt !== null + }) + ); + } + + // 3. Mcp-Name routing header on tasks/* (MUST → FAILURE). + if (this.tasksMethodRequests === 0) { + const meta = + CHECK_META['sep-2663-client-emits-mcp-name-on-tasks-methods']; + checks.push( + untestableCheck( + 'sep-2663-client-emits-mcp-name-on-tasks-methods', + meta.name, + meta.description, + 'client never sent a tasks/get, tasks/update, or tasks/cancel request', + meta.specReferences + ) + ); + } else { + checks.push( + this.check( + 'sep-2663-client-emits-mcp-name-on-tasks-methods', + this.mcpNameViolations, + { tasksMethodRequests: this.tasksMethodRequests } + ) + ); + } + + // 4. pollIntervalMs cadence (SHOULD → WARNING), early side only. + if (this.measuredPollGapsMs.length === 0) { + const meta = CHECK_META['sep-2663-client-honors-poll-interval']; + checks.push( + untestableCheck( + 'sep-2663-client-honors-poll-interval', + meta.name, + meta.description, + 'no consecutive tasks/get polls of the same task were observed, so polling cadence could not be measured', + meta.specReferences, + 'WARNING' + ) + ); + } else { + const meta = CHECK_META['sep-2663-client-honors-poll-interval']; + checks.push({ + id: 'sep-2663-client-honors-poll-interval', + name: meta.name, + description: meta.description, + status: this.earlyPolls.length === 0 ? 'SUCCESS' : 'WARNING', + timestamp: new Date().toISOString(), + errorMessage: + this.earlyPolls.length > 0 ? this.earlyPolls.join('; ') : undefined, + specReferences: meta.specReferences, + details: { + pollIntervalMs: TASKS_CLIENT_POLL_INTERVAL_MS, + toleranceMs: TASKS_CLIENT_POLL_TOLERANCE_MS, + measuredGapsMs: this.measuredPollGapsMs + } + }); + } + + // 5. Terminal (failed) task surfaced, flow continues (flow gate). + { + const errs: string[] = []; + if (!this.failTaskCreated) { + errs.push('client never issued tools/call for failing_task'); + } else if (this.failedDeliveredAt === null) { + errs.push( + 'client never polled failing_task to its terminal "failed" status' + ); + } else { + if (this.postFailedTerminalGets >= 3) { + errs.push( + `client kept polling the failed task (${this.postFailedTerminalGets} tasks/get requests after the terminal status was delivered) — terminal tasks must be surfaced, not polled indefinitely` + ); + } + if (this.requestCounter <= this.failedDeliveredAt) { + errs.push( + 'client did not continue the script after the task failed — the failure was not surfaced' + ); + } + } + checks.push( + this.check('tasks-client-terminal-failed-surfaced', errs, { + postFailedTerminalGets: this.postFailedTerminalGets + }) + ); + } + + // 6. Cancellation channel (MUST NOT → FAILURE). + { + const errs: string[] = []; + if (this.cancelledNotifications.length > 0) { + errs.push( + `client sent notifications/cancelled (${this.cancelledNotifications.join(', ')}) — the notifications/cancelled notification MUST NOT be used for task cancellation; use tasks/cancel` + ); + } + if (!this.cancelTaskCreated) { + errs.push('client never issued tools/call for cancel_task'); + } else if (!this.tasksCancelObserved) { + errs.push( + 'client never issued tasks/cancel for the running cancel_task task' + ); + } + checks.push( + this.check('sep-2663-cancel-not-via-cancelled-notification', errs, { + tasksCancelObserved: this.tasksCancelObserved, + cancelledNotifications: this.cancelledNotifications.length + }) + ); + } + + // 7. Invalid CreateTaskResult on an unsupported request type + // (MUST → FAILURE). + if (!this.pingObserved) { + const meta = + CHECK_META['sep-2663-client-rejects-task-result-on-unsupported']; + checks.push( + untestableCheck( + 'sep-2663-client-rejects-task-result-on-unsupported', + meta.name, + meta.description, + 'client never sent the ping request that the scenario answers with an invalid CreateTaskResult', + meta.specReferences + ) + ); + } else { + checks.push( + this.check( + 'sep-2663-client-rejects-task-result-on-unsupported', + this.bogusTaskRequests.length > 0 + ? [ + `client issued ${this.bogusTaskRequests.join(', ')} for the taskId returned to ping — a CreateTaskResult on an unsupported request type MUST be interpreted as an invalid response, not driven as a real task` + ] + : [] + ) + ); + } + + return checks; + } +} diff --git a/src/scenarios/index.ts b/src/scenarios/index.ts index db1584ea..65c85699 100644 --- a/src/scenarios/index.ts +++ b/src/scenarios/index.ts @@ -118,6 +118,7 @@ import { HttpInvalidToolHeadersScenario } from './client/http-custom-headers'; import { JsonSchemaRefDerefScenario } from './client/json-schema-ref-deref'; +import { TasksClientScenario } from './client/tasks-client'; // Pending client scenarios (not yet fully tested/implemented) const pendingClientScenariosList: ClientScenario[] = [ @@ -311,7 +312,10 @@ const scenariosList: Scenario[] = [ new HttpInvalidToolHeadersScenario(), // JSON Schema network $ref dereferencing (SEP-2106) - new JsonSchemaRefDerefScenario() + new JsonSchemaRefDerefScenario(), + + // SEP-2663 Tasks extension — client-side (issue #374) + new TasksClientScenario() ]; // Core scenarios (tier 1 requirements) @@ -370,8 +374,15 @@ export function listCoreScenarios(): string[] { return coreScenariosList.map((scenario) => scenario.name); } +// All client-testing scenarios sourced from a protocol extension, derived +// from the declared `source.extensionId` rather than a hand-maintained list +// (covers the auth extension scenarios and e.g. the SEP-2663 tasks one). +const extensionSpecScenariosList: Scenario[] = scenariosList.filter( + (scenario) => 'extensionId' in scenario.source +); + export function listExtensionScenarios(): string[] { - return extensionScenariosList.map((scenario) => scenario.name); + return extensionSpecScenariosList.map((scenario) => scenario.name); } export function listBackcompatScenarios(): string[] { diff --git a/src/seps/sep-2663.yaml b/src/seps/sep-2663.yaml index dec9dd93..674ea6fb 100644 --- a/src/seps/sep-2663.yaml +++ b/src/seps/sep-2663.yaml @@ -99,6 +99,16 @@ requirements: - check: sep-2663-no-progress-or-message-on-task-stream text: 'notifications/progress and notifications/message notifications MUST NOT be sent on the subscriptions/listen stream for a task, and are not supported on tasks in general in this specification.' + # ── Task Polling (client) ───────────────────────────────────────────────── + # Previously excluded as "not a wire-level conformance signal"; issue #374 + # asked for client-side pollIntervalMs coverage, and the interval between + # consecutive tasks/get POSTs for the same task is observable on the wire. + # The tasks-client-lifecycle scenario gates it on the early side only (a + # poll arriving before pollIntervalMs has elapsed), so slow CI cannot + # flake it — same approach as the sse-retry timing check. + - check: sep-2663-client-honors-poll-interval + text: 'Clients SHOULD respect the pollIntervalMs provided in responses when determining polling frequency.' + # ── Streamable HTTP: Routing Headers ────────────────────────────────────── - check: sep-2663-client-emits-mcp-name-on-tasks-methods text: 'When tasks/get, tasks/update, or tasks/cancel is sent over the Streamable HTTP transport, the client MUST set the Mcp-Name header (defined by SEP-2243) to the value of params.taskId.' @@ -155,6 +165,9 @@ requirements: # tasks-cancel-terminal-idempotent-ack, tasks-mrtr-tasks-update-resumes, # tasks-mrtr-partial-fulfillment, tasks-server-directed-creation-no-hint, # tasks-result-type-complete-on-non-task-responses, tasks-removed-tasks-list, + # the client-suite flow-gates tasks-client-extension-declared and + # tasks-client-terminal-failed-surfaced in + # src/scenarios/client/tasks-client.ts, # plus the ephemeral mrtr-* flow-gates in src/scenarios/server/mrtr/). # These verify end-to-end flow invariants rather than a specific normative # sentence, so they surface in the traceability manifest's `untracked` @@ -166,9 +179,6 @@ requirements: - text: 'implementations SHOULD be designed to accommodate additional request types in future revisions of this specification.' excluded: 'Architectural design guidance, not observable at the protocol level' - - text: 'Clients SHOULD respect the pollIntervalMs provided in responses when determining polling frequency.' - excluded: 'Client-internal polling cadence; the rate at which a client polls is not a wire-level conformance signal' - - text: 'Clients SHOULD continue polling until the task reaches a terminal status or until invoking tasks/cancel.' excluded: 'Internal client behavior with no observable violation at the protocol level' From f682a6f0cd456f2c0bfab9a5a7d7d3adf47de1a4 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 2 Jul 2026 19:15:16 +0000 Subject: [PATCH 2/2] fix(tasks-client): review fixes for the tasks-extension client scenario - Drop the poll-cadence anchor when tasks/cancel is observed so a prompt confirming tasks/get after cancellation is no longer measured against the last working poll (false early-poll WARNING); regression-tested with a poll -> cancel -> immediate-confirm conforming client, and the everything-client now exercises that exact sequence. - Only flag notifications/cancelled that actually target a task (by the originating tools/call request id or a reason referencing a known taskId); unrelated request cancellation is legal and recorded in details only, with a test proving it stays SUCCESS. - Move the 'client never issued tasks/cancel' arm off the SEP-traced sep-2663-cancel-not-via-cancelled-notification id into a dedicated flow gate, tasks-client-cancel-flow-completed (registered in the declared-ID list and the yaml flow-gate comment block); the untouched MUST NOT id now reports untestable when cancellation was never attempted. - initialize reply carries resultType:'complete' and clamps protocolVersion to NEGOTIABLE_PROTOCOL_VERSIONS instead of echoing arbitrary values. - tasks/update rejects unknown taskIds with -32602, matching tasks/cancel. - FAILURE-path tests for both remaining flow gates: a client that keeps polling the failed task, one that stops dead after it, and one that never cancels. - everything-client _meta merge spreads caller _meta first so the tasks capability entry stays authoritative. - Reword the timing-comparison comments: this check is the early-side half of the sse-retry timing approach (sse-retry gates both sides); document the cadence anchor (response-send to next-request-arrival) and why 50ms tolerance suffices in-host. --- .../clients/typescript/everything-client.ts | 13 +- src/scenarios/client/tasks-client.test.ts | 113 +++++++++++++- src/scenarios/client/tasks-client.ts | 145 ++++++++++++++++-- src/seps/sep-2663.yaml | 8 +- 4 files changed, 253 insertions(+), 26 deletions(-) diff --git a/examples/clients/typescript/everything-client.ts b/examples/clients/typescript/everything-client.ts index 9a0e746c..814f570a 100644 --- a/examples/clients/typescript/everything-client.ts +++ b/examples/clients/typescript/everything-client.ts @@ -1016,11 +1016,13 @@ async function runTasksClient(serverUrl: string): Promise { error?: { code: number; message: string }; }> { // Declare the tasks extension per-request (stateless-style negotiation). + // Caller _meta is spread first so the capability entry stays + // authoritative and cannot be silently dropped by a caller override. const _meta = { + ...((params._meta as object | undefined) ?? {}), 'io.modelcontextprotocol/clientCapabilities': { extensions: { [TASKS_EXTENSION_ID]: {} } - }, - ...((params._meta as object | undefined) ?? {}) + } }; const headers: Record = { 'Content-Type': 'application/json', @@ -1106,12 +1108,15 @@ async function runTasksClient(serverUrl: string): Promise { ); } - // Step 6: cancel_task — cancel the running task via tasks/cancel, then - // observe the cancelled status with one confirming tasks/get. + // Step 6: cancel_task — poll the running task once, cancel it via + // tasks/cancel, then observe the cancelled status with a prompt + // confirming tasks/get (pollIntervalMs governs polling of a running + // task, not the post-cancel confirmation). const cancel = (await sendRpc('tools/call', { name: 'cancel_task' })).result!; if (cancel.resultType === 'task') { const taskId = cancel.taskId as string; await sleep((cancel.pollIntervalMs as number) ?? 500); + await sendRpc('tasks/get', { taskId }); await sendRpc('tasks/cancel', { taskId }); const confirmed = await sendRpc('tasks/get', { taskId }); logger.debug('cancel_task status:', confirmed.result?.status); diff --git a/src/scenarios/client/tasks-client.test.ts b/src/scenarios/client/tasks-client.test.ts index 908e4e5b..90fd29a1 100644 --- a/src/scenarios/client/tasks-client.test.ts +++ b/src/scenarios/client/tasks-client.test.ts @@ -86,6 +86,14 @@ async function runConformingScript( omitMcpName?: boolean; /** Cancel via notifications/cancelled instead of tasks/cancel. */ cancelViaNotification?: boolean; + /** Create cancel_task but never cancel it (flow-gate negative). */ + skipTasksCancel?: boolean; + /** Send a notifications/cancelled unrelated to any task (legal). */ + sendUnrelatedCancelledNotification?: boolean; + /** Keep polling failing_task after its terminal status was delivered. */ + hammerFailedTask?: boolean; + /** Stop the script right after failing_task fails (nothing surfaced). */ + stopAfterFailedTask?: boolean; /** Treat CreateTaskResult as a plain result: never poll quick_task. */ ignoreCreateTaskResult?: boolean; /** Poll the bogus task returned to ping. */ @@ -118,14 +126,31 @@ async function runConformingScript( // sync_echo await rpc(serverUrl, 'tools/call', { name: 'sync_echo' }); + // Legal, non-task-targeted request cancellation must not trip the + // task-cancellation-channel check. + if (overrides.sendUnrelatedCancelledNotification) { + await rpc(serverUrl, 'notifications/cancelled', { + requestId: 424242, + reason: 'user aborted an unrelated request' + }); + } + // failing_task const failing = await rpc(serverUrl, 'tools/call', { name: 'failing_task' }); if (failing.resultType === 'task') { await wait(failing.pollIntervalMs ?? TASKS_CLIENT_POLL_INTERVAL_MS); await rpc(serverUrl, 'tasks/get', { taskId: failing.taskId }, taskOpts); + if (overrides.stopAfterFailedTask) return; + if (overrides.hammerFailedTask) { + for (let i = 0; i < 3; i++) { + await rpc(serverUrl, 'tasks/get', { taskId: failing.taskId }, taskOpts); + } + } } - // cancel_task + // cancel_task: poll the running task once, cancel it, then promptly + // confirm the cancelled status (the confirmation is not a poll of a + // running task and must not be measured against pollIntervalMs). const cancel = await rpc(serverUrl, 'tools/call', { name: 'cancel_task' }); if (cancel.resultType === 'task') { await wait(cancel.pollIntervalMs ?? TASKS_CLIENT_POLL_INTERVAL_MS); @@ -134,7 +159,8 @@ async function runConformingScript( requestId: 999, reason: `cancel task ${cancel.taskId}` }); - } else { + } else if (!overrides.skipTasksCancel) { + await rpc(serverUrl, 'tasks/get', { taskId: cancel.taskId }, taskOpts); await rpc(serverUrl, 'tasks/cancel', { taskId: cancel.taskId }, taskOpts); await rpc(serverUrl, 'tasks/get', { taskId: cancel.taskId }, taskOpts); } @@ -271,6 +297,89 @@ describe('tasks-client-lifecycle scenario (SEP-2663, issue #374)', () => { } }, 30000); + it('does not flag poll cadence for the prompt confirming tasks/get after tasks/cancel', async () => { + // Regression: poll (working) → tasks/cancel → immediate confirming + // tasks/get. The confirmation must not be measured against the last + // working poll's timestamp. + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl); + const checks = scenario.getChecks(); + expect( + statusOf(checks, 'sep-2663-client-honors-poll-interval'), + checks.find((c) => c.id === 'sep-2663-client-honors-poll-interval') + ?.errorMessage + ).toBe('SUCCESS'); + } finally { + await scenario.stop(); + } + }, 30000); + + it('FAILs the terminal flow gate when the client keeps polling the failed task', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl, { hammerFailedTask: true }); + const checks = scenario.getChecks(); + expect(statusOf(checks, 'tasks-client-terminal-failed-surfaced')).toBe( + 'FAILURE' + ); + } finally { + await scenario.stop(); + } + }, 30000); + + it('FAILs the terminal flow gate when the client stops after the failed task', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl, { stopAfterFailedTask: true }); + const checks = scenario.getChecks(); + expect(statusOf(checks, 'tasks-client-terminal-failed-surfaced')).toBe( + 'FAILURE' + ); + } finally { + await scenario.stop(); + } + }, 30000); + + it('FAILs the cancel flow gate when the client never issues tasks/cancel', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl, { skipTasksCancel: true }); + const checks = scenario.getChecks(); + expect(statusOf(checks, 'tasks-client-cancel-flow-completed')).toBe( + 'FAILURE' + ); + // The unexercised MUST NOT reports untestable under its own id. + const cancelCheck = checks.find( + (c) => c.id === 'sep-2663-cancel-not-via-cancelled-notification' + ); + expect(cancelCheck?.status).toBe('FAILURE'); + expect(cancelCheck?.details?.untestable).toBe(true); + } finally { + await scenario.stop(); + } + }, 30000); + + it('does not flag a notifications/cancelled unrelated to any task', async () => { + const scenario = new TasksClientScenario(); + const { serverUrl } = await scenario.start(testScenarioContext()); + try { + await runConformingScript(serverUrl, { + sendUnrelatedCancelledNotification: true + }); + const checks = scenario.getChecks(); + expect( + statusOf(checks, 'sep-2663-cancel-not-via-cancelled-notification') + ).toBe('SUCCESS'); + } finally { + await scenario.stop(); + } + }, 30000); + it('FAILs the cancellation-channel check when the client uses notifications/cancelled', async () => { const scenario = new TasksClientScenario(); const { serverUrl } = await scenario.start(testScenarioContext()); diff --git a/src/scenarios/client/tasks-client.ts b/src/scenarios/client/tasks-client.ts index 1a1aac35..de069417 100644 --- a/src/scenarios/client/tasks-client.ts +++ b/src/scenarios/client/tasks-client.ts @@ -42,6 +42,8 @@ import http from 'http'; import { ConformanceCheck, + LATEST_SPEC_VERSION, + NEGOTIABLE_PROTOCOL_VERSIONS, ScenarioSource, SpecReference } from '../../types.js'; @@ -59,8 +61,8 @@ export const TASKS_CLIENT_POLL_INTERVAL_MS = 300; /** * Early-side tolerance: a poll is only flagged when it arrives more than * this many milliseconds before pollIntervalMs has elapsed. Late polls are - * never flagged (slow CI must not flake the check), mirroring the sse-retry - * scenario's early-side-only timing gate. + * never flagged (slow CI must not flake the check) — the early-side half of + * the sse-retry scenario's timing approach. */ export const TASKS_CLIENT_POLL_TOLERANCE_MS = 50; @@ -79,6 +81,7 @@ export const TASKS_CLIENT_DECLARED_CHECK_IDS = [ 'sep-2663-client-emits-mcp-name-on-tasks-methods', 'sep-2663-client-honors-poll-interval', 'tasks-client-terminal-failed-surfaced', + 'tasks-client-cancel-flow-completed', 'sep-2663-cancel-not-via-cancelled-notification', 'sep-2663-client-rejects-task-result-on-unsupported' ] as const; @@ -117,6 +120,12 @@ const CHECK_META: Record< 'Flow gate: a task that reaches status "failed" (inlined JSON-RPC error) is surfaced — the client continues the script instead of polling the terminal task indefinitely', specReferences: [SEP_2663_REF] }, + 'tasks-client-cancel-flow-completed': { + name: 'TasksClientCancelFlowCompleted', + description: + 'Flow gate: the client creates the never-completing cancel_task task and cancels it via tasks/cancel', + specReferences: [SEP_2663_REF] + }, 'sep-2663-cancel-not-via-cancelled-notification': { name: 'TasksClientCancelsViaTasksCancel', description: @@ -240,7 +249,16 @@ apart (only early polls are flagged).`; private cancelTaskCreated = false; private cancelTaskCancelled = false; private tasksCancelObserved = false; - private cancelledNotifications: string[] = []; + /** JSON-RPC ids of the task-creating tools/call requests → their taskId, + * so a notifications/cancelled targeting one of them can be recognized as + * an (illegal) task-cancellation attempt. */ + private taskOriginRequestIds = new Map(); + /** notifications/cancelled that target a task (by originating requestId or + * by referencing a known taskId in `reason`) — spec violations. */ + private taskCancellationNotifications: string[] = []; + /** notifications/cancelled unrelated to any task — legal request + * cancellation, recorded for diagnostics only. */ + private unrelatedCancelledNotifications: string[] = []; private pingObserved = false; private bogusTaskRequests: string[] = []; @@ -268,7 +286,9 @@ apart (only early polls are flagged).`; this.cancelTaskCreated = false; this.cancelTaskCancelled = false; this.tasksCancelObserved = false; - this.cancelledNotifications = []; + this.taskOriginRequestIds.clear(); + this.taskCancellationNotifications = []; + this.unrelatedCancelledNotifications = []; this.pingObserved = false; this.bogusTaskRequests = []; this.tasksMethodRequests = 0; @@ -368,6 +388,12 @@ apart (only early polls are flagged).`; this.bogusTaskRequests.push(method); } if (method === 'tasks/get') { + // Cadence anchor: the gap runs from the moment the harness SENT + // the previous non-terminal tasks/get response to the ARRIVAL of + // this poll. Both endpoints are measured in the same process and + // the transport is a loopback socket, so sub-millisecond skew is + // the norm and the 50ms tolerance comfortably absorbs response + // write + parse overhead without masking a real early poll. const last = this.lastPollRespondedAt.get(taskId); if (last !== undefined) { const gap = Date.now() - last; @@ -395,10 +421,18 @@ apart (only early polls are flagged).`; if (caps?.extensions?.[TASKS_EXTENSION_ID]) { this.extensionDeclared = true; } + // Echo the requested version only when it is one the harness + // negotiates; otherwise clamp to the latest dated release — same + // policy as the shared mock server. const requested = params.protocolVersion; + const negotiated = + typeof requested === 'string' && + NEGOTIABLE_PROTOCOL_VERSIONS.includes(requested) + ? requested + : LATEST_SPEC_VERSION; this.sendResult(res, request, { - protocolVersion: - typeof requested === 'string' ? requested : '2025-11-25', + resultType: 'complete', + protocolVersion: negotiated, serverInfo: { name: this.name + '-server', version: '1.0.0' }, capabilities: this.discoverCapabilities() }); @@ -406,7 +440,23 @@ apart (only early polls are flagged).`; } case 'notifications/cancelled': { - this.cancelledNotifications.push(JSON.stringify(params)); + // The spec sentence only forbids notifications/cancelled FOR TASK + // CANCELLATION. Classify: a notification is task-targeted when its + // requestId is the JSON-RPC id of a task-creating tools/call, or + // when its reason references a known taskId. Anything else is legal + // request cancellation and only recorded for diagnostics. + const requestId = params.requestId as string | number | undefined; + const reason = typeof params.reason === 'string' ? params.reason : ''; + const knownTaskIds = [TASK_QUICK, TASK_FAIL, TASK_CANCEL, TASK_BOGUS]; + const targetsTask = + (requestId !== undefined && + this.taskOriginRequestIds.has(requestId)) || + knownTaskIds.some((id) => reason.includes(id)); + if (targetsTask) { + this.taskCancellationNotifications.push(JSON.stringify(params)); + } else { + this.unrelatedCancelledNotifications.push(JSON.stringify(params)); + } this.sendNotificationAck(res); return; } @@ -461,12 +511,26 @@ apart (only early polls are flagged).`; this.tasksCancelObserved = true; this.cancelTaskCancelled = true; } + // The task is terminal now: drop the cadence anchor so a prompt + // confirming tasks/get after the cancel ack is not measured against + // the last working poll (that would be a false early-poll flag — + // pollIntervalMs governs polling of a running task, not the + // post-cancel status confirmation). + this.lastPollRespondedAt.delete(taskId); // Empty ack (idempotent for terminal tasks). this.sendResult(res, request, { resultType: 'complete' }); return; } case 'tasks/update': { + const taskId = params.taskId as string | undefined; + if ( + taskId === undefined || + ![TASK_QUICK, TASK_FAIL, TASK_CANCEL].includes(taskId) + ) { + this.sendError(res, request, -32602, `Unknown task: ${taskId}`); + return; + } // No MRTR flow in this scenario; acknowledge with an empty result. this.sendResult(res, request, { resultType: 'complete' }); return; @@ -483,6 +547,15 @@ apart (only early polls are flagged).`; } } + /** Remember which JSON-RPC request id produced which task, so a later + * notifications/cancelled targeting that id is recognizable as an + * (illegal) task-cancellation attempt. */ + private recordTaskOrigin(request: JsonRpcRequest, taskId: string): void { + if (request.id !== undefined && request.id !== null) { + this.taskOriginRequestIds.set(request.id, taskId); + } + } + private handleToolsCall( res: http.ServerResponse, request: JsonRpcRequest @@ -513,6 +586,7 @@ apart (only early polls are flagged).`; return; } this.quickTaskCreated = true; + this.recordTaskOrigin(request, TASK_QUICK); this.sendResult(res, request, { resultType: 'task', ...this.taskEnvelope(TASK_QUICK, 'working') @@ -529,6 +603,7 @@ apart (only early polls are flagged).`; return; } this.failTaskCreated = true; + this.recordTaskOrigin(request, TASK_FAIL); this.sendResult(res, request, { resultType: 'task', ...this.taskEnvelope(TASK_FAIL, 'working') @@ -544,6 +619,7 @@ apart (only early polls are flagged).`; return; } this.cancelTaskCreated = true; + this.recordTaskOrigin(request, TASK_CANCEL); this.sendResult(res, request, { resultType: 'task', ...this.taskEnvelope(TASK_CANCEL, 'working') @@ -802,14 +878,9 @@ apart (only early polls are flagged).`; ); } - // 6. Cancellation channel (MUST NOT → FAILURE). + // 6. Flow gate: the cancellation flow was exercised at all. { const errs: string[] = []; - if (this.cancelledNotifications.length > 0) { - errs.push( - `client sent notifications/cancelled (${this.cancelledNotifications.join(', ')}) — the notifications/cancelled notification MUST NOT be used for task cancellation; use tasks/cancel` - ); - } if (!this.cancelTaskCreated) { errs.push('client never issued tools/call for cancel_task'); } else if (!this.tasksCancelObserved) { @@ -818,14 +889,54 @@ apart (only early polls are flagged).`; ); } checks.push( - this.check('sep-2663-cancel-not-via-cancelled-notification', errs, { - tasksCancelObserved: this.tasksCancelObserved, - cancelledNotifications: this.cancelledNotifications.length + this.check('tasks-client-cancel-flow-completed', errs, { + cancelTaskCreated: this.cancelTaskCreated, + tasksCancelObserved: this.tasksCancelObserved }) ); } - // 7. Invalid CreateTaskResult on an unsupported request type + // 7. Cancellation channel (MUST NOT → FAILURE). Only task-targeted + // notifications/cancelled violate the sentence; unrelated request + // cancellation is legal and surfaces in details only. + { + const meta = CHECK_META['sep-2663-cancel-not-via-cancelled-notification']; + if (this.taskCancellationNotifications.length > 0) { + checks.push( + this.check( + 'sep-2663-cancel-not-via-cancelled-notification', + [ + `client sent notifications/cancelled targeting a task (${this.taskCancellationNotifications.join(', ')}) — the notifications/cancelled notification MUST NOT be used for task cancellation; use tasks/cancel` + ], + { + taskCancellationNotifications: this.taskCancellationNotifications, + unrelatedCancelledNotifications: + this.unrelatedCancelledNotifications + } + ) + ); + } else if (this.tasksCancelObserved) { + checks.push( + this.check('sep-2663-cancel-not-via-cancelled-notification', [], { + tasksCancelObserved: true, + unrelatedCancelledNotifications: + this.unrelatedCancelledNotifications + }) + ); + } else { + checks.push( + untestableCheck( + 'sep-2663-cancel-not-via-cancelled-notification', + meta.name, + meta.description, + 'client never attempted task cancellation (no tasks/cancel for the running cancel_task task), so the cancellation channel could not be observed', + meta.specReferences + ) + ); + } + } + + // 8. Invalid CreateTaskResult on an unsupported request type // (MUST → FAILURE). if (!this.pingObserved) { const meta = diff --git a/src/seps/sep-2663.yaml b/src/seps/sep-2663.yaml index 674ea6fb..b70236c7 100644 --- a/src/seps/sep-2663.yaml +++ b/src/seps/sep-2663.yaml @@ -105,7 +105,8 @@ requirements: # consecutive tasks/get POSTs for the same task is observable on the wire. # The tasks-client-lifecycle scenario gates it on the early side only (a # poll arriving before pollIntervalMs has elapsed), so slow CI cannot - # flake it — same approach as the sse-retry timing check. + # flake it — the early-side half of the sse-retry timing approach; late + # polls are never flagged. - check: sep-2663-client-honors-poll-interval text: 'Clients SHOULD respect the pollIntervalMs provided in responses when determining polling frequency.' @@ -165,8 +166,9 @@ requirements: # tasks-cancel-terminal-idempotent-ack, tasks-mrtr-tasks-update-resumes, # tasks-mrtr-partial-fulfillment, tasks-server-directed-creation-no-hint, # tasks-result-type-complete-on-non-task-responses, tasks-removed-tasks-list, - # the client-suite flow-gates tasks-client-extension-declared and - # tasks-client-terminal-failed-surfaced in + # the client-suite flow-gates tasks-client-extension-declared, + # tasks-client-terminal-failed-surfaced, and + # tasks-client-cancel-flow-completed in # src/scenarios/client/tasks-client.ts, # plus the ephemeral mrtr-* flow-gates in src/scenarios/server/mrtr/). # These verify end-to-end flow invariants rather than a specific normative