From 1dd2b7646f961b5fc153d6f75d70694dccdd83de Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Tue, 14 Apr 2026 14:05:09 -0500 Subject: [PATCH 01/15] feat: enrich telemetry event types with env fingerprint, start timestamps, and new event types Add environment fingerprint fields (OS, Node version, CI detection, shell) to session.start and session.end events. Add startTimestamp to step and agent.tool events for span reconstruction. Define command and crash event types with stub emission methods. Add discriminated union Zod schema validation tests mirroring the API schema. --- src/utils/analytics.spec.ts | 141 ++++++++++++++++ src/utils/analytics.ts | 97 +++++++++++ src/utils/telemetry-schema.spec.ts | 258 +++++++++++++++++++++++++++++ src/utils/telemetry-types.ts | 45 ++++- 4 files changed, 540 insertions(+), 1 deletion(-) create mode 100644 src/utils/telemetry-schema.spec.ts diff --git a/src/utils/analytics.spec.ts b/src/utils/analytics.spec.ts index 97dc2a4b..8c0f1bfe 100644 --- a/src/utils/analytics.spec.ts +++ b/src/utils/analytics.spec.ts @@ -206,6 +206,17 @@ describe('Analytics', () => { }), ); }); + + it('includes environment fingerprint fields', () => { + analytics.sessionStart('cli', '1.0.0'); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'session.start')[0]; + expect(event.attributes).toHaveProperty('env.os'); + expect(event.attributes).toHaveProperty('env.os_version'); + expect(event.attributes).toHaveProperty('env.node_version'); + expect(event.attributes).toHaveProperty('env.shell'); + expect(typeof event.attributes['env.ci']).toBe('boolean'); + }); }); describe('shutdown', () => { @@ -260,6 +271,21 @@ describe('Analytics', () => { }), ); }); + + it('includes env fingerprint and installer.mode', async () => { + analytics.sessionStart('tui', '1.0.0'); + mockQueueEvent.mockClear(); + + await analytics.shutdown('success'); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'session.end')[0]; + expect(event.attributes).toHaveProperty('env.os'); + expect(event.attributes).toHaveProperty('env.os_version'); + expect(event.attributes).toHaveProperty('env.node_version'); + expect(event.attributes).toHaveProperty('env.shell'); + expect(typeof event.attributes['env.ci']).toBe('boolean'); + expect(event.attributes['installer.mode']).toBe('tui'); + }); }); describe('getFeatureFlag', () => { @@ -306,6 +332,14 @@ describe('Analytics', () => { const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'step')[0]; expect(event.error).toBeUndefined(); }); + + it('includes startTimestamp as valid ISO 8601', () => { + analytics.stepCompleted('detect_framework', 150, true); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'step')[0]; + expect(event.startTimestamp).toBeDefined(); + expect(new Date(event.startTimestamp).toISOString()).toBe(event.startTimestamp); + }); }); describe('toolCalled', () => { @@ -334,6 +368,14 @@ describe('Analytics', () => { }), ); }); + + it('includes startTimestamp as valid ISO 8601', () => { + analytics.toolCalled('Write', 50, true); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'agent.tool')[0]; + expect(event.startTimestamp).toBeDefined(); + expect(new Date(event.startTimestamp).toISOString()).toBe(event.startTimestamp); + }); }); describe('llmRequest', () => { @@ -351,6 +393,13 @@ describe('Analytics', () => { ); }); + it('does NOT include startTimestamp (point-in-time marker)', () => { + analytics.llmRequest('claude-sonnet-4-20250514', 1000, 500); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'agent.llm')[0]; + expect(event.startTimestamp).toBeUndefined(); + }); + it('accumulates tokens for session.end', async () => { analytics.llmRequest('claude-sonnet-4-20250514', 1000, 500); analytics.llmRequest('claude-sonnet-4-20250514', 800, 300); @@ -375,6 +424,80 @@ describe('Analytics', () => { expect(sessionEnd.attributes['installer.agent.iterations']).toBe(3); }); }); + + describe('commandExecuted', () => { + it('queues a command event with correct attributes', () => { + analytics.commandExecuted('org.list', 200, true); + + expect(mockQueueEvent).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'command', + attributes: expect.objectContaining({ + 'command.name': 'org.list', + 'command.duration_ms': 200, + 'command.success': true, + 'env.os': expect.any(String), + 'env.node_version': expect.any(String), + }), + }), + ); + }); + + it('includes error info when provided', () => { + const error = new TypeError('Not found'); + analytics.commandExecuted('org.get', 50, false, { error }); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'command')[0]; + expect(event.attributes['command.error_type']).toBe('TypeError'); + expect(event.attributes['command.error_message']).toBe('Not found'); + }); + + it('includes flags as comma-separated names', () => { + analytics.commandExecuted('org.list', 100, true, { flags: ['json', 'limit'] }); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'command')[0]; + expect(event.attributes['command.flags']).toBe('json,limit'); + }); + }); + + describe('captureUnhandledCrash', () => { + it('queues a crash event with error details', () => { + const error = new Error('Unexpected failure'); + error.stack = 'Error: Unexpected failure\n at foo.ts:1'; + analytics.captureUnhandledCrash(error, { command: 'install', version: '1.0.0' }); + + expect(mockQueueEvent).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'crash', + attributes: expect.objectContaining({ + 'crash.error_type': 'Error', + 'crash.error_message': 'Unexpected failure', + 'crash.stack': 'Error: Unexpected failure\n at foo.ts:1', + 'crash.command': 'install', + 'installer.version': '1.0.0', + 'env.os': expect.any(String), + 'env.node_version': expect.any(String), + }), + }), + ); + }); + + it('truncates stack traces to 4KB', () => { + const error = new Error('Big stack'); + error.stack = 'x'.repeat(5000); + analytics.captureUnhandledCrash(error); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'crash')[0]; + expect(event.attributes['crash.stack'].length).toBe(4096); + }); + + it('defaults version to unknown when not provided', () => { + analytics.captureUnhandledCrash(new Error('test')); + + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'crash')[0]; + expect(event.attributes['installer.version']).toBe('unknown'); + }); + }); }); describe('with telemetry disabled', () => { @@ -454,5 +577,23 @@ describe('Analytics', () => { expect(mockQueueEvent).not.toHaveBeenCalled(); }); + + it('commandExecuted does nothing', async () => { + const { Analytics } = await import('./analytics.js'); + const analytics = new Analytics(); + + analytics.commandExecuted('org.list', 100, true); + + expect(mockQueueEvent).not.toHaveBeenCalled(); + }); + + it('captureUnhandledCrash does nothing', async () => { + const { Analytics } = await import('./analytics.js'); + const analytics = new Analytics(); + + analytics.captureUnhandledCrash(new Error('test')); + + expect(mockQueueEvent).not.toHaveBeenCalled(); + }); }); }); diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index c0f348ea..1c9242bb 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -1,3 +1,4 @@ +import os from 'node:os'; import { v4 as uuidv4 } from 'uuid'; import { debug } from './debug.js'; import { telemetryClient } from './telemetry-client.js'; @@ -7,6 +8,8 @@ import type { StepEvent, AgentToolEvent, AgentLLMEvent, + CommandEvent, + CrashEvent, } from './telemetry-types.js'; import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; @@ -15,6 +18,7 @@ export class Analytics { private sessionId: string; private sessionStartTime: Date; private distinctId?: string; + private mode?: 'cli' | 'tui' | 'headless'; // Agent metrics tracking private totalInputTokens = 0; @@ -71,9 +75,40 @@ export class Analytics { return undefined; } + private detectCiProvider(): string | undefined { + if (process.env.GITHUB_ACTIONS) return 'github-actions'; + if (process.env.BUILDKITE) return 'buildkite'; + if (process.env.CIRCLECI) return 'circleci'; + if (process.env.GITLAB_CI) return 'gitlab-ci'; + if (process.env.JENKINS_URL) return 'jenkins'; + return undefined; + } + + private getEnvFingerprint() { + let osVersion: string; + try { + osVersion = os.release(); + } catch { + osVersion = 'unknown'; + } + + const ciProvider = this.detectCiProvider(); + + return { + 'env.os': process.platform, + 'env.os_version': osVersion, + 'env.node_version': process.version, + 'env.shell': process.env.SHELL ?? process.env.COMSPEC ?? 'unknown', + 'env.ci': Boolean(process.env.CI || process.env.GITHUB_ACTIONS || process.env.BUILDKITE), + ...(ciProvider ? { 'env.ci_provider': ciProvider } : {}), + }; + } + sessionStart(mode: 'cli' | 'tui' | 'headless', version: string) { if (!WORKOS_TELEMETRY_ENABLED) return; + this.mode = mode; + const event: SessionStartEvent = { type: 'session.start', sessionId: this.sessionId, @@ -82,6 +117,7 @@ export class Analytics { 'installer.version': version, 'installer.mode': mode, 'workos.user_id': this.distinctId, + ...this.getEnvFingerprint(), }, }; @@ -96,6 +132,7 @@ export class Analytics { sessionId: this.sessionId, timestamp: new Date().toISOString(), name, + startTimestamp: new Date(Date.now() - durationMs).toISOString(), durationMs, success, error: error ? { type: error.name, message: error.message } : undefined, @@ -112,6 +149,7 @@ export class Analytics { sessionId: this.sessionId, timestamp: new Date().toISOString(), toolName, + startTimestamp: new Date(Date.now() - durationMs).toISOString(), durationMs, success, }; @@ -141,6 +179,61 @@ export class Analytics { this.agentIterations++; } + commandExecuted( + name: string, + durationMs: number, + success: boolean, + options?: { error?: Error; flags?: string[] }, + ) { + if (!WORKOS_TELEMETRY_ENABLED) return; + + const event: CommandEvent = { + type: 'command', + sessionId: this.sessionId, + timestamp: new Date().toISOString(), + attributes: { + 'command.name': name, + 'command.duration_ms': durationMs, + 'command.success': success, + ...(options?.error + ? { + 'command.error_type': options.error.name, + 'command.error_message': options.error.message, + } + : {}), + ...(options?.flags?.length + ? { 'command.flags': options.flags.join(',') } + : {}), + ...this.getEnvFingerprint(), + }, + }; + + telemetryClient.queueEvent(event); + } + + captureUnhandledCrash(error: Error, options?: { command?: string; version?: string }) { + if (!WORKOS_TELEMETRY_ENABLED) return; + + const stack = error.stack ?? ''; + const truncatedStack = stack.length > 4096 ? stack.slice(0, 4096) : stack; + + const event: CrashEvent = { + type: 'crash', + sessionId: this.sessionId, + timestamp: new Date().toISOString(), + attributes: { + 'crash.error_type': error.name, + 'crash.error_message': error.message, + 'crash.stack': truncatedStack, + ...(options?.command ? { 'crash.command': options.command } : {}), + 'installer.version': options?.version ?? 'unknown', + ...this.getEnvFingerprint(), + }, + }; + + telemetryClient.queueEvent(event); + } + async shutdown(status: 'success' | 'error' | 'cancelled') { if (!WORKOS_TELEMETRY_ENABLED) return; @@ -152,6 +245,8 @@ export class Analytics { string | number | boolean >; + const envFingerprint = this.getEnvFingerprint(); + const event: SessionEndEvent = { type: 'session.end', sessionId: this.sessionId, @@ -162,6 +257,8 @@ export class Analytics { 'installer.agent.iterations': this.agentIterations, 'installer.agent.tokens.input': this.totalInputTokens, 'installer.agent.tokens.output': this.totalOutputTokens, + ...envFingerprint, + ...(this.mode ? { 'installer.mode': this.mode } : {}), ...extraAttributes, }, }; diff --git a/src/utils/telemetry-schema.spec.ts b/src/utils/telemetry-schema.spec.ts new file mode 100644 index 00000000..45c27d3f --- /dev/null +++ b/src/utils/telemetry-schema.spec.ts @@ -0,0 +1,258 @@ +import { describe, it, expect } from 'vitest'; +import { z } from 'zod'; + +/** + * Mirror of the API's Zod discriminated union schema. + * These tests validate that the schema shape preserves top-level fields + * (not stripped by safeParse) and accepts all 7 event types. + * This ensures CLI and API stay in sync. + */ + +const attributesSchema = z + .record(z.string(), z.union([z.string(), z.number(), z.boolean()])) + .optional(); + +const baseFields = { + sessionId: z.string(), + timestamp: z.string(), + attributes: attributesSchema, +}; + +const SessionStartSchema = z + .object({ type: z.literal('session.start'), ...baseFields }) + .passthrough(); + +const SessionEndSchema = z + .object({ type: z.literal('session.end'), ...baseFields }) + .passthrough(); + +const StepSchema = z + .object({ + type: z.literal('step'), + ...baseFields, + name: z.string(), + startTimestamp: z.string().optional(), + durationMs: z.number(), + success: z.boolean(), + error: z.object({ type: z.string(), message: z.string() }).optional(), + }) + .passthrough(); + +const AgentToolSchema = z + .object({ + type: z.literal('agent.tool'), + ...baseFields, + toolName: z.string(), + startTimestamp: z.string().optional(), + durationMs: z.number(), + success: z.boolean(), + }) + .passthrough(); + +const AgentLlmSchema = z + .object({ + type: z.literal('agent.llm'), + ...baseFields, + model: z.string(), + inputTokens: z.number(), + outputTokens: z.number(), + }) + .passthrough(); + +const CommandSchema = z + .object({ type: z.literal('command'), ...baseFields }) + .passthrough(); + +const CrashSchema = z + .object({ type: z.literal('crash'), ...baseFields }) + .passthrough(); + +const TelemetryEventSchema = z.discriminatedUnion('type', [ + SessionStartSchema, + SessionEndSchema, + StepSchema, + AgentToolSchema, + AgentLlmSchema, + CommandSchema, + CrashSchema, +]); + +describe('TelemetryEventSchema (discriminated union)', () => { + const base = { sessionId: 'sess-1', timestamp: '2024-01-01T00:00:00Z' }; + + describe('accepts all 7 event types', () => { + it('accepts session.start', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'session.start', + ...base, + attributes: { 'installer.version': '1.0.0' }, + }); + expect(result.success).toBe(true); + }); + + it('accepts session.end', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'session.end', + ...base, + attributes: { 'installer.outcome': 'success' }, + }); + expect(result.success).toBe(true); + }); + + it('accepts step', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'step', + ...base, + name: 'detect', + startTimestamp: '2024-01-01T00:00:00Z', + durationMs: 100, + success: true, + }); + expect(result.success).toBe(true); + }); + + it('accepts agent.tool', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.tool', + ...base, + toolName: 'Write', + startTimestamp: '2024-01-01T00:00:00Z', + durationMs: 50, + success: true, + }); + expect(result.success).toBe(true); + }); + + it('accepts agent.llm', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.llm', + ...base, + model: 'claude', + inputTokens: 100, + outputTokens: 50, + }); + expect(result.success).toBe(true); + }); + + it('accepts command', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'command', + ...base, + attributes: { 'command.name': 'org.list' }, + }); + expect(result.success).toBe(true); + }); + + it('accepts crash', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'crash', + ...base, + attributes: { 'crash.error_type': 'Error' }, + }); + expect(result.success).toBe(true); + }); + }); + + describe('preserves top-level fields via .passthrough()', () => { + it('preserves name and durationMs on step events', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'step', + ...base, + name: 'detect', + durationMs: 100, + success: true, + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toHaveProperty('name', 'detect'); + expect(result.data).toHaveProperty('durationMs', 100); + } + }); + + it('preserves toolName on agent.tool events', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.tool', + ...base, + toolName: 'Write', + durationMs: 50, + success: true, + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toHaveProperty('toolName', 'Write'); + } + }); + + it('preserves model, inputTokens, outputTokens on agent.llm events', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.llm', + ...base, + model: 'claude', + inputTokens: 100, + outputTokens: 50, + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toHaveProperty('model', 'claude'); + expect(result.data).toHaveProperty('inputTokens', 100); + expect(result.data).toHaveProperty('outputTokens', 50); + } + }); + + it('preserves startTimestamp on step events', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'step', + ...base, + name: 'detect', + startTimestamp: '2024-01-01T00:00:00Z', + durationMs: 100, + success: true, + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toHaveProperty('startTimestamp', '2024-01-01T00:00:00Z'); + } + }); + }); + + describe('backward compatibility', () => { + it('accepts step event without startTimestamp', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'step', + ...base, + name: 'detect', + durationMs: 100, + success: true, + }); + expect(result.success).toBe(true); + }); + + it('accepts agent.tool event without startTimestamp', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'agent.tool', + ...base, + toolName: 'Write', + durationMs: 50, + success: true, + }); + expect(result.success).toBe(true); + }); + }); + + describe('rejects invalid events', () => { + it('rejects events with unknown type', () => { + const result = TelemetryEventSchema.safeParse({ + type: 'unknown', + ...base, + }); + expect(result.success).toBe(false); + }); + + it('rejects events without type', () => { + const result = TelemetryEventSchema.safeParse({ + ...base, + }); + expect(result.success).toBe(false); + }); + }); +}); diff --git a/src/utils/telemetry-types.ts b/src/utils/telemetry-types.ts index b9a66798..93faa117 100644 --- a/src/utils/telemetry-types.ts +++ b/src/utils/telemetry-types.ts @@ -4,7 +4,7 @@ */ export interface TelemetryEvent { - type: 'session.start' | 'session.end' | 'step' | 'agent.tool' | 'agent.llm'; + type: 'session.start' | 'session.end' | 'step' | 'agent.tool' | 'agent.llm' | 'command' | 'crash'; sessionId: string; timestamp: string; attributes?: Record; @@ -17,6 +17,12 @@ export interface SessionStartEvent extends TelemetryEvent { 'installer.mode': 'cli' | 'tui' | 'headless'; 'workos.user_id'?: string; 'workos.org_id'?: string; + 'env.os': string; + 'env.os_version': string; + 'env.node_version': string; + 'env.shell': string; + 'env.ci': boolean; + 'env.ci_provider'?: string; }; } @@ -31,6 +37,7 @@ export interface SessionEndEvent extends TelemetryEvent { export interface StepEvent extends TelemetryEvent { type: 'step'; name: string; + startTimestamp: string; durationMs: number; success: boolean; error?: { @@ -42,6 +49,7 @@ export interface StepEvent extends TelemetryEvent { export interface AgentToolEvent extends TelemetryEvent { type: 'agent.tool'; toolName: string; + startTimestamp: string; durationMs: number; success: boolean; } @@ -53,6 +61,41 @@ export interface AgentLLMEvent extends TelemetryEvent { outputTokens: number; } +export interface CommandEvent extends TelemetryEvent { + type: 'command'; + attributes: { + 'command.name': string; + 'command.duration_ms': number; + 'command.success': boolean; + 'command.error_type'?: string; + 'command.error_message'?: string; + 'command.flags'?: string; + 'env.os': string; + 'env.os_version': string; + 'env.node_version': string; + 'env.shell': string; + 'env.ci': boolean; + 'env.ci_provider'?: string; + }; +} + +export interface CrashEvent extends TelemetryEvent { + type: 'crash'; + attributes: { + 'crash.error_type': string; + 'crash.error_message': string; + 'crash.stack': string; + 'crash.command'?: string; + 'installer.version': string; + 'env.os': string; + 'env.os_version': string; + 'env.node_version': string; + 'env.shell': string; + 'env.ci': boolean; + 'env.ci_provider'?: string; + }; +} + export interface TelemetryRequest { events: TelemetryEvent[]; } From 27b31ef7d9d37b92f7a24b9c7df94d1a0f01d0ba Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Tue, 14 Apr 2026 14:25:12 -0500 Subject: [PATCH 02/15] feat: add command-level telemetry, crash reporting, and store-forward persistence Wire up yargs middleware that emits a provisional command event before each handler runs, then replaces it with actual duration/success on completion. This covers the ~25 process.exit() call sites without modifying them. - Command telemetry middleware with canonical name resolution and flag extraction - Crash reporter with sanitized stack traces (sync handlers, no async) - Store-forward: persist unsent events to temp file on exit, recover on next run - Fix flush() to retain events until HTTP success (was clearing before fetch) - Auto-wrap handlers in registerSubcommand() (single change point) - Shared COMMAND_ALIASES map for telemetry and help-json - analytics.initForNonInstaller() sets gatewayUrl + JWT from stored creds --- src/bin.ts | 15 ++ src/lib/command-aliases.ts | 10 ++ src/utils/analytics.spec.ts | 113 +++++++++++++++ src/utils/analytics.ts | 38 ++++++ src/utils/command-telemetry.spec.ts | 159 ++++++++++++++++++++++ src/utils/command-telemetry.ts | 86 ++++++++++++ src/utils/crash-reporter.spec.ts | 149 ++++++++++++++++++++ src/utils/crash-reporter.ts | 55 ++++++++ src/utils/register-subcommand.ts | 5 +- src/utils/telemetry-client.spec.ts | 106 ++++++++++++++- src/utils/telemetry-client.ts | 45 +++++- src/utils/telemetry-store-forward.spec.ts | 159 ++++++++++++++++++++++ src/utils/telemetry-store-forward.ts | 57 ++++++++ 13 files changed, 990 insertions(+), 7 deletions(-) create mode 100644 src/lib/command-aliases.ts create mode 100644 src/utils/command-telemetry.spec.ts create mode 100644 src/utils/command-telemetry.ts create mode 100644 src/utils/crash-reporter.spec.ts create mode 100644 src/utils/crash-reporter.ts create mode 100644 src/utils/telemetry-store-forward.spec.ts create mode 100644 src/utils/telemetry-store-forward.ts diff --git a/src/bin.ts b/src/bin.ts index 213a4379..ba9f345a 100644 --- a/src/bin.ts +++ b/src/bin.ts @@ -43,6 +43,20 @@ import { } from './utils/output.js'; import clack from './utils/clack.js'; import { registerSubcommand } from './utils/register-subcommand.js'; +import { COMMAND_ALIASES } from './lib/command-aliases.js'; +import { installCrashReporter } from './utils/crash-reporter.js'; +import { installStoreForward, recoverPendingEvents } from './utils/telemetry-store-forward.js'; +import { commandTelemetryMiddleware } from './utils/command-telemetry.js'; +import { analytics } from './utils/analytics.js'; + +// Telemetry infrastructure: crash reporter, store-forward, and gateway init. +// Must be before yargs so crashes during startup are captured. +installCrashReporter(); +installStoreForward(); +analytics.initForNonInstaller(); +// Fire-and-forget: recover events from previous crashes/exits. +// NO await — must not block startup (flush timeout is 3s). +recoverPendingEvents(); // Resolve output mode early from raw argv (before yargs parses) const rawArgs = hideBin(process.argv); @@ -210,6 +224,7 @@ yargs(rawArgs) describe: 'Interaction mode: human, coding agent, or CI automation', global: true, }) + .middleware(commandTelemetryMiddleware(rawArgs)) .middleware(async (argv) => { // Warn about unclaimed environments before management commands. // Excluded: auth/claim/install/dashboard handle their own credential flows; diff --git a/src/lib/command-aliases.ts b/src/lib/command-aliases.ts new file mode 100644 index 00000000..8f515ae3 --- /dev/null +++ b/src/lib/command-aliases.ts @@ -0,0 +1,10 @@ +/** + * Shared canonical command alias map. + * Single source of truth for both telemetry and help-json. + * + * Keys are user-facing aliases, values are canonical command names. + * Adding an alias here updates both metrics aggregation and --help --json output. + */ +export const COMMAND_ALIASES: Record = { + org: 'organization', +}; diff --git a/src/utils/analytics.spec.ts b/src/utils/analytics.spec.ts index 8c0f1bfe..c98531b8 100644 --- a/src/utils/analytics.spec.ts +++ b/src/utils/analytics.spec.ts @@ -5,6 +5,7 @@ const mockSetGatewayUrl = vi.fn(); const mockSetAccessToken = vi.fn(); const mockQueueEvent = vi.fn(); const mockFlush = vi.fn().mockResolvedValue(undefined); +const mockReplaceLastEventOfType = vi.fn(); vi.mock('./telemetry-client.js', () => ({ telemetryClient: { @@ -12,6 +13,7 @@ vi.mock('./telemetry-client.js', () => ({ setAccessToken: mockSetAccessToken, queueEvent: mockQueueEvent, flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), }, })); @@ -25,6 +27,27 @@ vi.mock('uuid', () => ({ v4: () => 'test-session-id-123', })); +// Mock settings for initForNonInstaller +const mockGetLlmGatewayUrl = vi.fn(() => 'https://api.workos.com/llm-gateway'); +const mockSettingsConfig = { + nodeVersion: '>=18', + logging: { debugMode: false }, + telemetry: { enabled: true, eventName: 'installer_interaction' }, + documentation: { workosDocsUrl: 'https://workos.com/docs', dashboardUrl: 'https://dashboard.workos.com', issuesUrl: 'https://github.com' }, + legacy: { oauthPort: 3000 }, +}; +vi.mock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => mockGetLlmGatewayUrl(), + getConfig: () => mockSettingsConfig, + getVersion: () => '0.12.1', +})); + +// Mock credentials for initForNonInstaller +const mockGetCredentials = vi.fn(); +vi.mock('../lib/credentials.js', () => ({ + getCredentials: () => mockGetCredentials(), +})); + describe('Analytics', () => { // Need to handle WORKOS_TELEMETRY_ENABLED which is evaluated at import time const originalEnv = process.env.WORKOS_TELEMETRY; @@ -56,8 +79,17 @@ describe('Analytics', () => { setAccessToken: mockSetAccessToken, queueEvent: mockQueueEvent, flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), }, })); + vi.doMock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => mockGetLlmGatewayUrl(), + getConfig: () => mockSettingsConfig, + getVersion: () => '0.12.1', + })); + vi.doMock('../lib/credentials.js', () => ({ + getCredentials: () => mockGetCredentials(), + })); const module = await import('./analytics.js'); Analytics = module.Analytics; analytics = new Analytics(); @@ -498,6 +530,59 @@ describe('Analytics', () => { expect(event.attributes['installer.version']).toBe('unknown'); }); }); + + describe('initForNonInstaller', () => { + it('sets gatewayUrl from default config', () => { + mockGetLlmGatewayUrl.mockReturnValue('https://api.workos.com/llm-gateway'); + analytics.initForNonInstaller(); + + expect(mockSetGatewayUrl).toHaveBeenCalledWith('https://api.workos.com/llm-gateway'); + }); + + it('sets access token from stored credentials', () => { + mockGetCredentials.mockReturnValue({ accessToken: 'stored-jwt-token' }); + analytics.initForNonInstaller(); + + expect(mockSetAccessToken).toHaveBeenCalledWith('stored-jwt-token'); + }); + + it('skips access token when no credentials stored', () => { + mockGetCredentials.mockReturnValue(null); + analytics.initForNonInstaller(); + + expect(mockSetAccessToken).not.toHaveBeenCalled(); + }); + }); + + describe('replaceLastCommandEvent', () => { + it('removes last command event and queues a new one', () => { + analytics.replaceLastCommandEvent('organization.list', 150, true, { flags: ['json'] }); + + expect(mockReplaceLastEventOfType).toHaveBeenCalledWith('command'); + expect(mockQueueEvent).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'command', + attributes: expect.objectContaining({ + 'command.name': 'organization.list', + 'command.duration_ms': 150, + 'command.success': true, + 'command.flags': 'json', + }), + }), + ); + }); + + it('includes error info on failure', () => { + const error = new Error('oops'); + error.name = 'CommandError'; + analytics.replaceLastCommandEvent('auth.login', 50, false, { error }); + + const event = mockQueueEvent.mock.calls[0][0]; + expect(event.attributes['command.success']).toBe(false); + expect(event.attributes['command.error_type']).toBe('CommandError'); + expect(event.attributes['command.error_message']).toBe('oops'); + }); + }); }); describe('with telemetry disabled', () => { @@ -510,8 +595,17 @@ describe('Analytics', () => { setAccessToken: mockSetAccessToken, queueEvent: mockQueueEvent, flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), }, })); + vi.doMock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => mockGetLlmGatewayUrl(), + getConfig: () => mockSettingsConfig, + getVersion: () => '0.12.1', + })); + vi.doMock('../lib/credentials.js', () => ({ + getCredentials: () => mockGetCredentials(), + })); }); it('capture does nothing', async () => { @@ -595,5 +689,24 @@ describe('Analytics', () => { expect(mockQueueEvent).not.toHaveBeenCalled(); }); + + it('initForNonInstaller does nothing', async () => { + const { Analytics } = await import('./analytics.js'); + const analytics = new Analytics(); + + analytics.initForNonInstaller(); + + expect(mockSetGatewayUrl).not.toHaveBeenCalled(); + }); + + it('replaceLastCommandEvent does nothing', async () => { + const { Analytics } = await import('./analytics.js'); + const analytics = new Analytics(); + + analytics.replaceLastCommandEvent('org.list', 100, true); + + expect(mockReplaceLastEventOfType).not.toHaveBeenCalled(); + expect(mockQueueEvent).not.toHaveBeenCalled(); + }); }); }); diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index 1c9242bb..9253b6e6 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -12,6 +12,8 @@ import type { CrashEvent, } from './telemetry-types.js'; import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; +import { getLlmGatewayUrl } from '../lib/settings.js'; +import { getCredentials } from '../lib/credentials.js'; export class Analytics { private tags: Record = {}; @@ -43,6 +45,24 @@ export class Analytics { telemetryClient.setGatewayUrl(url); } + /** + * Initialize telemetry for non-installer commands. + * Sets gatewayUrl from default config and loads stored JWT if available. + * The installer flow sets these itself in run-with-core.ts; this covers + * management commands like `org list`, `auth login`, etc. + */ + initForNonInstaller(): void { + if (!WORKOS_TELEMETRY_ENABLED) return; + + const gatewayUrl = getLlmGatewayUrl(); + telemetryClient.setGatewayUrl(gatewayUrl); + + const creds = getCredentials(); + if (creds?.accessToken) { + telemetryClient.setAccessToken(creds.accessToken); + } + } + setTag(key: string, value: string | boolean | number | null | undefined) { this.tags[key] = value; } @@ -211,6 +231,24 @@ export class Analytics { telemetryClient.queueEvent(event); } + /** + * Replace the last queued command event with updated data. + * Used by the command handler wrapper to swap the provisional event + * (queued by middleware) with the real one after the handler completes. + */ + replaceLastCommandEvent( + name: string, + durationMs: number, + success: boolean, + options?: { error?: Error; flags?: string[] }, + ) { + if (!WORKOS_TELEMETRY_ENABLED) return; + + telemetryClient.replaceLastEventOfType('command'); + + this.commandExecuted(name, durationMs, success, options); + } + captureUnhandledCrash(error: Error, options?: { command?: string; version?: string }) { if (!WORKOS_TELEMETRY_ENABLED) return; diff --git a/src/utils/command-telemetry.spec.ts b/src/utils/command-telemetry.spec.ts new file mode 100644 index 00000000..d0095fe0 --- /dev/null +++ b/src/utils/command-telemetry.spec.ts @@ -0,0 +1,159 @@ +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import { resolveCanonicalName, extractUserFlags, commandTelemetryMiddleware, wrapCommandHandler } from './command-telemetry.js'; + +const mockCommandExecuted = vi.fn(); +const mockReplaceLastCommandEvent = vi.fn(); + +vi.mock('./analytics.js', () => ({ + analytics: { + commandExecuted: (...args: unknown[]) => mockCommandExecuted(...args), + replaceLastCommandEvent: (...args: unknown[]) => mockReplaceLastCommandEvent(...args), + }, +})); + +vi.mock('../lib/constants.js', () => ({ + WORKOS_TELEMETRY_ENABLED: true, +})); + +describe('command-telemetry', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('resolveCanonicalName', () => { + it('resolves aliased commands', () => { + expect(resolveCanonicalName(['org', 'list'])).toBe('organization.list'); + }); + + it('passes through non-aliased commands', () => { + expect(resolveCanonicalName(['auth', 'login'])).toBe('auth.login'); + }); + + it('returns root for empty parts', () => { + expect(resolveCanonicalName([])).toBe('root'); + }); + + it('handles single-part commands', () => { + expect(resolveCanonicalName(['install'])).toBe('install'); + }); + + it('only aliases the first part', () => { + expect(resolveCanonicalName(['org', 'org'])).toBe('organization.org'); + }); + }); + + describe('extractUserFlags', () => { + it('extracts long flags', () => { + expect(extractUserFlags(['org', 'list', '--json'])).toEqual(['json']); + }); + + it('extracts short flags', () => { + expect(extractUserFlags(['-v'])).toEqual(['v']); + }); + + it('handles flags with values', () => { + expect(extractUserFlags(['--env=staging'])).toEqual(['env']); + }); + + it('deduplicates flags', () => { + expect(extractUserFlags(['--json', '--json'])).toEqual(['json']); + }); + + it('ignores positionals', () => { + expect(extractUserFlags(['org', 'list', 'my-org'])).toEqual([]); + }); + + it('ignores multi-char short flags (not real flags)', () => { + expect(extractUserFlags(['-abc'])).toEqual([]); + }); + }); + + describe('commandTelemetryMiddleware', () => { + it('queues provisional event with duration=0', async () => { + const middleware = commandTelemetryMiddleware(['org', 'list', '--json']); + const argv: Record = { _: ['org', 'list'] }; + + await middleware(argv); + + expect(mockCommandExecuted).toHaveBeenCalledWith('organization.list', 0, true, { + flags: ['json'], + }); + }); + + it('stores telemetry metadata on argv', async () => { + const middleware = commandTelemetryMiddleware(['auth', 'login']); + const argv: Record = { _: ['auth', 'login'] }; + + await middleware(argv); + + expect(argv.__telemetryCommandName).toBe('auth.login'); + expect(argv.__telemetryStartTime).toBeTypeOf('number'); + expect(argv.__telemetryFlags).toEqual([]); + }); + }); + + describe('wrapCommandHandler', () => { + it('replaces provisional event on success', async () => { + const handler = vi.fn().mockResolvedValue(undefined); + const wrapped = wrapCommandHandler(handler); + const argv = { + __telemetryCommandName: 'organization.list', + __telemetryStartTime: Date.now() - 100, + __telemetryFlags: ['json'], + }; + + await wrapped(argv); + + expect(handler).toHaveBeenCalledWith(argv); + expect(mockReplaceLastCommandEvent).toHaveBeenCalledWith( + 'organization.list', + expect.any(Number), + true, + { flags: ['json'] }, + ); + const duration = mockReplaceLastCommandEvent.mock.calls[0][1] as number; + expect(duration).toBeGreaterThanOrEqual(100); + }); + + it('replaces provisional event on failure with error', async () => { + const error = new Error('command failed'); + const handler = vi.fn().mockRejectedValue(error); + const wrapped = wrapCommandHandler(handler); + const argv = { + __telemetryCommandName: 'organization.list', + __telemetryStartTime: Date.now(), + __telemetryFlags: [], + }; + + await expect(wrapped(argv)).rejects.toThrow('command failed'); + + expect(mockReplaceLastCommandEvent).toHaveBeenCalledWith( + 'organization.list', + expect.any(Number), + false, + { error, flags: [] }, + ); + }); + + it('re-throws the original error', async () => { + const error = new Error('original'); + const handler = vi.fn().mockRejectedValue(error); + const wrapped = wrapCommandHandler(handler); + + await expect(wrapped({ __telemetryStartTime: Date.now() })).rejects.toBe(error); + }); + + it('handles non-Error throws', async () => { + const handler = vi.fn().mockRejectedValue('string error'); + const wrapped = wrapCommandHandler(handler); + + await expect( + wrapped({ __telemetryCommandName: 'test', __telemetryStartTime: Date.now(), __telemetryFlags: [] }), + ).rejects.toBe('string error'); + + const errorArg = mockReplaceLastCommandEvent.mock.calls[0][3].error; + expect(errorArg).toBeInstanceOf(Error); + expect(errorArg.message).toBe('string error'); + }); + }); +}); diff --git a/src/utils/command-telemetry.ts b/src/utils/command-telemetry.ts new file mode 100644 index 00000000..57603b19 --- /dev/null +++ b/src/utils/command-telemetry.ts @@ -0,0 +1,86 @@ +import { analytics } from './analytics.js'; +import { COMMAND_ALIASES } from '../lib/command-aliases.js'; +import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; + +/** + * Resolve user-typed command parts to their canonical name. + * Applies alias mapping to the top-level command only. + * + * Examples: + * ['org', 'list'] -> 'organization.list' + * ['auth', 'login'] -> 'auth.login' + * [] -> 'root' + */ +export function resolveCanonicalName(parts: string[]): string { + if (parts.length === 0) return 'root'; + const resolved = [...parts]; + resolved[0] = COMMAND_ALIASES[resolved[0]] ?? resolved[0]; + return resolved.join('.'); +} + +/** + * Extract only user-supplied flags (not positionals, not defaults). + * Parses rawArgs directly instead of argv to avoid positionals and camelCase dupes. + */ +export function extractUserFlags(rawArgs: string[]): string[] { + const passedFlags = rawArgs + .filter((arg) => arg.startsWith('--') || (arg.startsWith('-') && arg.length === 2)) + .map((arg) => arg.replace(/^-+/, '').split('=')[0]); + return [...new Set(passedFlags)]; +} + +/** + * Yargs middleware that queues a PROVISIONAL command event immediately. + * This ensures there's always an event to persist via store-forward, + * even if the handler calls process.exit() before returning. + * + * The provisional event has success=true and duration=0. It gets + * updated by the handler wrapper on normal completion/failure. + */ +export function commandTelemetryMiddleware(rawArgs: string[]) { + return async (argv: Record) => { + if (!WORKOS_TELEMETRY_ENABLED) return; + + const commandParts = (argv._ as string[]) || []; + const commandName = resolveCanonicalName(commandParts); + const flags = extractUserFlags(rawArgs); + const startTime = Date.now(); + + // Store metadata for the handler wrapper to update later + argv.__telemetryCommandName = commandName; + argv.__telemetryStartTime = startTime; + argv.__telemetryFlags = flags; + + // Queue provisional event NOW, before the handler runs. + // If the handler calls process.exit(), store-forward persists this. + analytics.commandExecuted(commandName, 0, true, { flags }); + }; +} + +/** + * Wraps a yargs command handler to UPDATE the provisional event + * with actual duration and success/failure on completion. + * Designed to be called inside registerSubcommand(), not at each call site. + */ +export function wrapCommandHandler( + handler: (argv: any) => Promise, +): (argv: any) => Promise { + return async (argv) => { + const commandName = String(argv.__telemetryCommandName ?? 'unknown'); + const startTime = Number(argv.__telemetryStartTime ?? Date.now()); + const flags = (argv.__telemetryFlags as string[]) ?? []; + + try { + await handler(argv); + // Replace the provisional event with the real one + analytics.replaceLastCommandEvent(commandName, Date.now() - startTime, true, { flags }); + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)); + analytics.replaceLastCommandEvent(commandName, Date.now() - startTime, false, { + error: err, + flags, + }); + throw error; + } + }; +} diff --git a/src/utils/crash-reporter.spec.ts b/src/utils/crash-reporter.spec.ts new file mode 100644 index 00000000..a36b918d --- /dev/null +++ b/src/utils/crash-reporter.spec.ts @@ -0,0 +1,149 @@ +import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest'; +import os from 'node:os'; + +const mockCaptureUnhandledCrash = vi.fn(); + +vi.mock('./analytics.js', () => ({ + analytics: { + captureUnhandledCrash: (...args: unknown[]) => mockCaptureUnhandledCrash(...args), + }, +})); + +describe('crash-reporter', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.resetModules(); + }); + + describe('sanitizeStack', () => { + let sanitizeStack: typeof import('./crash-reporter.js').sanitizeStack; + + beforeEach(async () => { + const mod = await import('./crash-reporter.js'); + sanitizeStack = mod.sanitizeStack; + }); + + it('returns empty string for undefined', () => { + expect(sanitizeStack(undefined)).toBe(''); + }); + + it('replaces home directory with ~', () => { + const home = os.homedir(); + const stack = `Error: test\n at ${home}/project/src/index.ts:1:1`; + expect(sanitizeStack(stack)).toContain('~'); + expect(sanitizeStack(stack)).not.toContain(home); + }); + + it('strips absolute paths to node_modules/dist/src', () => { + const stack = 'Error\n at /long/absolute/path/to/src/file.ts:1:1'; + const result = sanitizeStack(stack); + expect(result).toContain('src/'); + expect(result).not.toContain('/long/absolute/path/to/'); + }); + + it('truncates stacks longer than 4096 chars', () => { + const longStack = 'Error: test\n' + 'x'.repeat(5000); + const result = sanitizeStack(longStack); + expect(result.length).toBeLessThanOrEqual(4096 + '\n...[truncated]'.length); + expect(result).toContain('...[truncated]'); + }); + + it('does not truncate short stacks', () => { + const shortStack = 'Error: test\n at file.ts:1:1'; + const result = sanitizeStack(shortStack); + expect(result).toBe(shortStack); + expect(result).not.toContain('truncated'); + }); + }); + + describe('installCrashReporter', () => { + let processOnSpy: ReturnType; + let processExitSpy: ReturnType; + + beforeEach(() => { + processOnSpy = vi.spyOn(process, 'on'); + processExitSpy = vi.spyOn(process, 'exit').mockImplementation((() => {}) as never); + }); + + afterEach(() => { + processOnSpy.mockRestore(); + processExitSpy.mockRestore(); + }); + + it('registers uncaughtException and unhandledRejection handlers', async () => { + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const eventNames = processOnSpy.mock.calls.map((call) => call[0]); + expect(eventNames).toContain('uncaughtException'); + expect(eventNames).toContain('unhandledRejection'); + }); + + it('uncaughtException handler queues crash event and exits', async () => { + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const uncaughtHandler = processOnSpy.mock.calls.find((c) => c[0] === 'uncaughtException')?.[1] as ( + err: Error, + ) => void; + + const error = new Error('boom'); + uncaughtHandler(error); + + expect(mockCaptureUnhandledCrash).toHaveBeenCalledTimes(1); + const capturedError = mockCaptureUnhandledCrash.mock.calls[0][0]; + expect(capturedError.message).toBe('boom'); + expect(processExitSpy).toHaveBeenCalledWith(1); + }); + + it('unhandledRejection handler wraps non-Error reasons', async () => { + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const rejectionHandler = processOnSpy.mock.calls.find((c) => c[0] === 'unhandledRejection')?.[1] as ( + reason: unknown, + ) => void; + + rejectionHandler('string reason'); + + expect(mockCaptureUnhandledCrash).toHaveBeenCalledTimes(1); + const capturedError = mockCaptureUnhandledCrash.mock.calls[0][0]; + expect(capturedError.message).toBe('string reason'); + }); + + it('isCrashing guard prevents recursive handling', async () => { + // Simulate the crash handler being called, then itself crashing + mockCaptureUnhandledCrash.mockImplementationOnce(() => { + // First call succeeds + }); + + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const uncaughtHandler = processOnSpy.mock.calls.find((c) => c[0] === 'uncaughtException')?.[1] as ( + err: Error, + ) => void; + + // First call sets isCrashing + uncaughtHandler(new Error('first')); + // Second call should be guarded (module-level isCrashing = true) + uncaughtHandler(new Error('second')); + + // Only first call should have reached analytics + expect(mockCaptureUnhandledCrash).toHaveBeenCalledTimes(1); + }); + + it('handlers are synchronous (no async in the critical path)', async () => { + const { installCrashReporter } = await import('./crash-reporter.js'); + installCrashReporter(); + + const uncaughtHandler = processOnSpy.mock.calls.find((c) => c[0] === 'uncaughtException')?.[1] as ( + err: Error, + ) => void; + + // Verify the handler returns void, not a Promise + const result = uncaughtHandler(new Error('sync test')); + expect(result).toBeUndefined(); + }); + }); +}); diff --git a/src/utils/crash-reporter.ts b/src/utils/crash-reporter.ts new file mode 100644 index 00000000..14207793 --- /dev/null +++ b/src/utils/crash-reporter.ts @@ -0,0 +1,55 @@ +import { analytics } from './analytics.js'; +import { homedir } from 'node:os'; + +const MAX_STACK_LENGTH = 4096; +let isCrashing = false; + +/** + * Sanitize stack trace: strip absolute paths to relative, remove home dir. + * Prevents leaking file system layout in telemetry events. + */ +export function sanitizeStack(stack: string | undefined): string { + if (!stack) return ''; + const home = homedir(); + let sanitized = stack; + sanitized = sanitized.replaceAll(home, '~'); + sanitized = sanitized.replace(/\/[^\s:]+\/(node_modules|dist|src)\//g, '$1/'); + return sanitized.length > MAX_STACK_LENGTH + ? sanitized.slice(0, MAX_STACK_LENGTH) + '\n...[truncated]' + : sanitized; +} + +/** + * Register global handlers for uncaughtException and unhandledRejection + * that capture crash details before the process exits. + * + * Handlers are SYNCHRONOUS. Node does NOT await async uncaughtException handlers. + * We queue the event synchronously; store-forward's process.on('exit') handler + * persists it to disk. The next CLI invocation recovers and sends. + */ +export function installCrashReporter(): void { + process.on('uncaughtException', (error) => { + reportCrashSync(error); + process.exit(1); + }); + + process.on('unhandledRejection', (reason) => { + const error = reason instanceof Error ? reason : new Error(String(reason)); + reportCrashSync(error); + process.exit(1); + }); +} + +function reportCrashSync(error: Error): void { + if (isCrashing) return; + isCrashing = true; + try { + // Sanitize the stack before passing to analytics + const sanitized = new Error(error.message); + sanitized.name = error.name; + sanitized.stack = sanitizeStack(error.stack); + analytics.captureUnhandledCrash(sanitized); + } catch { + // Telemetry must never prevent exit + } +} diff --git a/src/utils/register-subcommand.ts b/src/utils/register-subcommand.ts index 39cb8b2f..610f4f35 100644 --- a/src/utils/register-subcommand.ts +++ b/src/utils/register-subcommand.ts @@ -1,5 +1,7 @@ import yargs from 'yargs'; import type { Argv } from 'yargs'; +import { wrapCommandHandler } from './command-telemetry.js'; +import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; interface YargsOptions { demandedOptions: Record; @@ -43,5 +45,6 @@ export function registerSubcommand( // Builder threw during probe — fall back to unenriched description } - return parentYargs.command(usage, enrichedDescription, builder, handler); + const telemetryHandler = WORKOS_TELEMETRY_ENABLED ? wrapCommandHandler(handler) : handler; + return parentYargs.command(usage, enrichedDescription, builder, telemetryHandler); } diff --git a/src/utils/telemetry-client.spec.ts b/src/utils/telemetry-client.spec.ts index c4781e5a..23609263 100644 --- a/src/utils/telemetry-client.spec.ts +++ b/src/utils/telemetry-client.spec.ts @@ -15,6 +15,18 @@ vi.mock('../lib/credentials.js', () => ({ getCredentials: () => mockGetCredentials(), })); +// Mock fs for persistToFile tests +const mockMkdirSync = vi.fn(); +const mockWriteFileSync = vi.fn(); +vi.mock('node:fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + mkdirSync: (...args: unknown[]) => mockMkdirSync(...args), + writeFileSync: (...args: unknown[]) => mockWriteFileSync(...args), + }; +}); + // Import after mocks are set up const { TelemetryClient } = await import('./telemetry-client.js'); @@ -136,15 +148,26 @@ describe('TelemetryClient', () => { expect(mockFetch).toHaveBeenCalledTimes(1); }); - it('clears events even if flush fails', async () => { + it('retains events when flush fails (for store-forward)', async () => { mockFetch.mockRejectedValueOnce(new Error('Network error')); client.setGatewayUrl('http://localhost:8000'); client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); - await client.flush(); // Should not throw - await client.flush(); // Should be no-op + await client.flush(); // Should not throw, events retained + await client.flush(); // Should retry since events are still queued - expect(mockFetch).toHaveBeenCalledTimes(1); + expect(mockFetch).toHaveBeenCalledTimes(2); + }); + + it('retains events on non-ok response (for store-forward)', async () => { + mockFetch.mockResolvedValueOnce({ ok: false, status: 500 }); + client.setGatewayUrl('http://localhost:8000'); + client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); + + await client.flush(); // Events retained on 500 + await client.flush(); // Should retry + + expect(mockFetch).toHaveBeenCalledTimes(2); }); it('handles network errors silently', async () => { @@ -182,4 +205,79 @@ describe('TelemetryClient', () => { ); }); }); + + describe('replaceLastEventOfType', () => { + it('removes the last event of the specified type', async () => { + client.setGatewayUrl('http://localhost:8000'); + client.queueEvent({ type: 'command', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }); + client.queueEvent({ type: 'session.start', sessionId: '1', timestamp: '2024-01-01T00:00:01Z' }); + client.queueEvent({ type: 'command', sessionId: '1', timestamp: '2024-01-01T00:00:02Z' }); + + client.replaceLastEventOfType('command'); + + await client.flush(); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.events).toHaveLength(2); + expect(body.events[0].type).toBe('command'); + expect(body.events[1].type).toBe('session.start'); + }); + + it('does nothing if no event of that type exists', () => { + client.queueEvent({ type: 'session.start', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }); + // Should not throw + client.replaceLastEventOfType('command'); + }); + }); + + describe('queueEvents', () => { + it('queues multiple events at once', async () => { + client.setGatewayUrl('http://localhost:8000'); + client.queueEvents([ + { type: 'command', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }, + { type: 'crash', sessionId: '1', timestamp: '2024-01-01T00:00:01Z' }, + ]); + + await client.flush(); + const body = JSON.parse(mockFetch.mock.calls[0][1].body); + expect(body.events).toHaveLength(2); + }); + }); + + describe('persistToFile', () => { + beforeEach(() => { + mockMkdirSync.mockReset(); + mockWriteFileSync.mockReset(); + }); + + it('writes events to file and clears queue', async () => { + client.queueEvent({ type: 'session.start', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }); + client.persistToFile('/tmp/test-persist.json'); + + expect(mockMkdirSync).toHaveBeenCalledWith('/tmp', { recursive: true }); + expect(mockWriteFileSync).toHaveBeenCalledWith( + '/tmp/test-persist.json', + expect.stringContaining('session.start'), + 'utf-8', + ); + + // Queue should be empty after persist + client.setGatewayUrl('http://localhost:8000'); + await client.flush(); + expect(mockFetch).not.toHaveBeenCalled(); + }); + + it('does nothing when no events queued', () => { + client.persistToFile('/tmp/test-persist.json'); + expect(mockWriteFileSync).not.toHaveBeenCalled(); + }); + + it('fails silently on write error', () => { + mockMkdirSync.mockImplementation(() => { + throw new Error('EACCES'); + }); + + client.queueEvent({ type: 'session.start', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }); + expect(() => client.persistToFile('/tmp/test-persist.json')).not.toThrow(); + }); + }); }); diff --git a/src/utils/telemetry-client.ts b/src/utils/telemetry-client.ts index f85aea1d..bf20dcc1 100644 --- a/src/utils/telemetry-client.ts +++ b/src/utils/telemetry-client.ts @@ -1,3 +1,5 @@ +import { mkdirSync, writeFileSync } from 'node:fs'; +import { dirname } from 'node:path'; import { debug } from './debug.js'; import type { TelemetryEvent, TelemetryRequest } from './telemetry-types.js'; import { getCredentials } from '../lib/credentials.js'; @@ -23,6 +25,26 @@ export class TelemetryClient { this.events.push(event); } + /** + * Remove the last queued event of a given type. + * Used to swap a provisional event with an updated one. + */ + replaceLastEventOfType(type: TelemetryEvent['type']): void { + for (let i = this.events.length - 1; i >= 0; i--) { + if (this.events[i].type === type) { + this.events.splice(i, 1); + return; + } + } + } + + /** + * Queue multiple pre-formed events (used by store-forward recovery). + */ + queueEvents(events: TelemetryEvent[]): void { + this.events.push(...events); + } + async flush(): Promise { if (this.events.length === 0) return; if (!this.gatewayUrl) { @@ -31,7 +53,7 @@ export class TelemetryClient { } const payload: TelemetryRequest = { events: [...this.events] }; - this.events = []; + // DO NOT clear this.events yet — retain until success const headers: Record = { 'Content-Type': 'application/json', @@ -56,15 +78,34 @@ export class TelemetryClient { signal: controller.signal, }); - if (!response.ok) { + if (response.ok) { + this.events = []; + } else { debug(`[Telemetry] Failed to send: ${response.status}`); + // Events remain in queue for store-forward to persist } } catch (error) { debug(`[Telemetry] Error sending events: ${error}`); + // Events remain in queue for store-forward to persist } finally { clearTimeout(timeout); } } + + /** + * Synchronously write pending events to a file. + * Used as last resort in process.on('exit') handler. + */ + persistToFile(filePath: string): void { + if (this.events.length === 0) return; + try { + mkdirSync(dirname(filePath), { recursive: true }); + writeFileSync(filePath, JSON.stringify(this.events), 'utf-8'); + this.events = []; + } catch { + // Silent failure — telemetry must never block exit + } + } } export const telemetryClient = new TelemetryClient(); diff --git a/src/utils/telemetry-store-forward.spec.ts b/src/utils/telemetry-store-forward.spec.ts new file mode 100644 index 00000000..28367b42 --- /dev/null +++ b/src/utils/telemetry-store-forward.spec.ts @@ -0,0 +1,159 @@ +import { describe, it, expect, beforeEach, vi } from 'vitest'; + +const mockPersistToFile = vi.fn(); +const mockQueueEvents = vi.fn(); +const mockFlush = vi.fn().mockResolvedValue(undefined); + +vi.mock('./telemetry-client.js', () => ({ + telemetryClient: { + persistToFile: (...args: unknown[]) => mockPersistToFile(...args), + queueEvents: (...args: unknown[]) => mockQueueEvents(...args), + flush: () => mockFlush(), + }, +})); + +vi.mock('./debug.js', () => ({ + debug: vi.fn(), +})); + +const mockExistsSync = vi.fn(); +const mockReaddirSync = vi.fn(); +const mockReadFileSync = vi.fn(); +const mockUnlinkSync = vi.fn(); + +vi.mock('node:fs', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + existsSync: (...args: unknown[]) => mockExistsSync(...args), + readdirSync: (...args: unknown[]) => mockReaddirSync(...args), + readFileSync: (...args: unknown[]) => mockReadFileSync(...args), + unlinkSync: (...args: unknown[]) => mockUnlinkSync(...args), + }; +}); + +describe('telemetry-store-forward', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('installStoreForward', () => { + it('registers a process exit handler', async () => { + const onSpy = vi.spyOn(process, 'on'); + vi.resetModules(); + + const { installStoreForward } = await import('./telemetry-store-forward.js'); + installStoreForward(); + + const exitHandlers = onSpy.mock.calls.filter((c) => c[0] === 'exit'); + expect(exitHandlers.length).toBeGreaterThanOrEqual(1); + + onSpy.mockRestore(); + }); + + it('exit handler calls persistToFile with PID-based path', async () => { + const onSpy = vi.spyOn(process, 'on'); + vi.resetModules(); + + const { installStoreForward } = await import('./telemetry-store-forward.js'); + installStoreForward(); + + const exitHandler = onSpy.mock.calls.find((c) => c[0] === 'exit')?.[1] as () => void; + exitHandler(); + + expect(mockPersistToFile).toHaveBeenCalledTimes(1); + const filePath = mockPersistToFile.mock.calls[0][0] as string; + expect(filePath).toContain('workos-cli-telemetry'); + expect(filePath).toContain(`pending-${process.pid}`); + + onSpy.mockRestore(); + }); + }); + + describe('recoverPendingEvents', () => { + it('does nothing if pending dir does not exist', async () => { + mockExistsSync.mockReturnValue(false); + vi.resetModules(); + + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + expect(mockReaddirSync).not.toHaveBeenCalled(); + expect(mockQueueEvents).not.toHaveBeenCalled(); + }); + + it('reads and queues events from pending files', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-1234.json', 'pending-5678.json']); + const events1 = [{ type: 'command', sessionId: '1', timestamp: '2024-01-01T00:00:00Z' }]; + const events2 = [{ type: 'crash', sessionId: '2', timestamp: '2024-01-01T00:00:01Z' }]; + mockReadFileSync + .mockReturnValueOnce(JSON.stringify(events1)) + .mockReturnValueOnce(JSON.stringify(events2)); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + expect(mockQueueEvents).toHaveBeenCalledTimes(2); + expect(mockQueueEvents).toHaveBeenCalledWith(events1); + expect(mockQueueEvents).toHaveBeenCalledWith(events2); + expect(mockFlush).toHaveBeenCalledTimes(1); + }); + + it('deletes files immediately after reading (before send)', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-1234.json']); + mockReadFileSync.mockReturnValue(JSON.stringify([{ type: 'command', sessionId: '1', timestamp: 'x' }])); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + // unlinkSync should be called before flush + expect(mockUnlinkSync).toHaveBeenCalledTimes(1); + const unlinkPath = mockUnlinkSync.mock.calls[0][0] as string; + expect(unlinkPath).toContain('pending-1234.json'); + }); + + it('handles corrupted files gracefully', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-bad.json']); + mockReadFileSync.mockReturnValue('not valid json{{{'); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + + // Should not throw + await expect(recoverPendingEvents()).resolves.toBeUndefined(); + // Should try to delete the corrupted file + expect(mockUnlinkSync).toHaveBeenCalled(); + }); + + it('skips non-pending files', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-1234.json', 'other-file.txt', 'readme.md']); + + const events = [{ type: 'command', sessionId: '1', timestamp: 'x' }]; + mockReadFileSync.mockReturnValue(JSON.stringify(events)); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + expect(mockReadFileSync).toHaveBeenCalledTimes(1); + }); + + it('skips empty event arrays', async () => { + mockExistsSync.mockReturnValue(true); + mockReaddirSync.mockReturnValue(['pending-1234.json']); + mockReadFileSync.mockReturnValue('[]'); + + vi.resetModules(); + const { recoverPendingEvents } = await import('./telemetry-store-forward.js'); + await recoverPendingEvents(); + + expect(mockQueueEvents).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/src/utils/telemetry-store-forward.ts b/src/utils/telemetry-store-forward.ts new file mode 100644 index 00000000..1339086f --- /dev/null +++ b/src/utils/telemetry-store-forward.ts @@ -0,0 +1,57 @@ +import { readFileSync, readdirSync, unlinkSync, existsSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { telemetryClient } from './telemetry-client.js'; +import { debug } from './debug.js'; + +const PENDING_DIR = join(tmpdir(), 'workos-cli-telemetry'); +const PENDING_FILE = join(PENDING_DIR, `pending-${process.pid}.json`); + +/** + * Register a sync exit handler that persists unsent events to disk. + * Called once at startup. Uses PID in filename to prevent concurrent + * CLI invocations from colliding. + */ +export function installStoreForward(): void { + process.on('exit', () => { + telemetryClient.persistToFile(PENDING_FILE); + }); +} + +/** + * On startup, check for ANY pending files from previous invocations + * (could be from different PIDs) and send them. Non-blocking, fire-and-forget. + */ +export async function recoverPendingEvents(): Promise { + try { + if (!existsSync(PENDING_DIR)) return; + const files = readdirSync(PENDING_DIR).filter( + (f) => f.startsWith('pending-') && f.endsWith('.json'), + ); + + for (const file of files) { + const filePath = join(PENDING_DIR, file); + try { + const raw = readFileSync(filePath, 'utf-8'); + unlinkSync(filePath); // Delete immediately to avoid double-send + + const events = JSON.parse(raw); + if (Array.isArray(events) && events.length > 0) { + telemetryClient.queueEvents(events); + } + } catch { + // Corrupted file — delete and move on + try { + unlinkSync(filePath); + } catch { + /* ignore */ + } + } + } + + // Flush all recovered events in one batch + await telemetryClient.flush(); + } catch { + debug('[Telemetry] Store-forward recovery failed silently'); + } +} From ff929a8045c9481b46508107ecdac1ab223905fc Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Tue, 14 Apr 2026 14:42:17 -0500 Subject: [PATCH 03/15] feat: add WORKOS_DEBUG=1 env var for verbose logging on all commands - Enable debug output for non-installer commands via env var - Log telemetry event details (type, name, duration, attributes) on flush - Register in debug command's env var catalog --- src/bin.ts | 7 +++++++ src/commands/debug.ts | 1 + src/utils/telemetry-client.ts | 15 ++++++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/bin.ts b/src/bin.ts index ba9f345a..0a5acc2b 100644 --- a/src/bin.ts +++ b/src/bin.ts @@ -49,6 +49,13 @@ import { installStoreForward, recoverPendingEvents } from './utils/telemetry-sto import { commandTelemetryMiddleware } from './utils/command-telemetry.js'; import { analytics } from './utils/analytics.js'; +// Enable debug logging for all commands via env var. +// Subsumes the installer's --debug flag for non-installer commands. +if (process.env.WORKOS_DEBUG === '1') { + const { enableDebugLogs } = await import('./utils/debug.js'); + enableDebugLogs(); +} + // Telemetry infrastructure: crash reporter, store-forward, and gateway init. // Must be before yargs so crashes during startup are captured. installCrashReporter(); diff --git a/src/commands/debug.ts b/src/commands/debug.ts index 1ca9bc6b..0b23560a 100644 --- a/src/commands/debug.ts +++ b/src/commands/debug.ts @@ -321,6 +321,7 @@ interface EnvVarInfo { } const ENV_VAR_CATALOG: { name: string; effect: string }[] = [ + { name: 'WORKOS_DEBUG', effect: 'Set to "1" to enable verbose debug logging for all commands' }, { name: 'WORKOS_API_KEY', effect: 'Bypasses credential resolution — used directly for API calls' }, { name: 'WORKOS_MODE', effect: 'Controls interaction behavior: human, agent, or CI' }, { name: 'WORKOS_FORCE_TTY', effect: 'Forces human (non-JSON) output mode, even when piped' }, diff --git a/src/utils/telemetry-client.ts b/src/utils/telemetry-client.ts index bf20dcc1..57f442f7 100644 --- a/src/utils/telemetry-client.ts +++ b/src/utils/telemetry-client.ts @@ -69,7 +69,20 @@ export class TelemetryClient { const timeout = setTimeout(() => controller.abort(), 3000); try { - debug(`[Telemetry] Sending ${payload.events.length} events to ${this.gatewayUrl}/telemetry`); + const eventSummary = payload.events.map((e) => { + const attrs = e.attributes ?? {}; + switch (e.type) { + case 'session.start': return `session.start(mode=${attrs['installer.mode']}, os=${attrs['env.os']})`; + case 'session.end': return `session.end(outcome=${attrs['installer.outcome']}, duration=${attrs['installer.duration_ms']}ms)`; + case 'step': return `step(${(e as any).name}, ${(e as any).durationMs}ms, success=${(e as any).success})`; + case 'agent.tool': return `agent.tool(${(e as any).toolName}, ${(e as any).durationMs}ms)`; + case 'agent.llm': return `agent.llm(${(e as any).model}, in=${(e as any).inputTokens}, out=${(e as any).outputTokens})`; + case 'command': return `command(${attrs['command.name']}, ${attrs['command.duration_ms']}ms, success=${attrs['command.success']})`; + case 'crash': return `crash(${attrs['crash.error_type']}: ${attrs['crash.error_message']})`; + default: return e.type; + } + }).join('\n '); + debug(`[Telemetry] Sending ${payload.events.length} events to ${this.gatewayUrl}/telemetry:\n ${eventSummary}`); const response = await fetch(`${this.gatewayUrl}/telemetry`, { method: 'POST', From fd325f9526a6197283208d36f90f8319409eff9c Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Tue, 14 Apr 2026 15:31:03 -0500 Subject: [PATCH 04/15] fix: address review findings for telemetry implementation - Wrap inline command handlers (seed, setup-org, doctor, etc.) with wrapCommandHandler so they report real duration/success - Skip provisional telemetry event for install command (has own session telemetry) - Add claim -> env.claim to canonical alias map - Defer store-forward file deletion until after successful flush --- src/bin.ts | 38 ++++++++++++++-------------- src/lib/command-aliases.ts | 1 + src/utils/command-telemetry.ts | 7 +++++ src/utils/telemetry-store-forward.ts | 20 ++++++++++----- 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/src/bin.ts b/src/bin.ts index 0a5acc2b..fc8fc724 100644 --- a/src/bin.ts +++ b/src/bin.ts @@ -46,7 +46,7 @@ import { registerSubcommand } from './utils/register-subcommand.js'; import { COMMAND_ALIASES } from './lib/command-aliases.js'; import { installCrashReporter } from './utils/crash-reporter.js'; import { installStoreForward, recoverPendingEvents } from './utils/telemetry-store-forward.js'; -import { commandTelemetryMiddleware } from './utils/command-telemetry.js'; +import { commandTelemetryMiddleware, wrapCommandHandler } from './utils/command-telemetry.js'; import { analytics } from './utils/analytics.js'; // Enable debug logging for all commands via env var. @@ -398,10 +398,10 @@ yargs(rawArgs) description: 'Auto-update stale WorkOS skills (writes to /skills/workos/ and workos-widgets/ only)', }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { const { handleDoctor } = await import('./commands/doctor.js'); await handleDoctor(argv); - }, + }), ) // NOTE: When adding commands here, also update src/utils/help-json.ts .command('env', 'Manage environment configurations (API keys, endpoints, active environment)', (yargs) => { @@ -2155,7 +2155,7 @@ yargs(rawArgs) clean: { type: 'boolean', default: false, describe: 'Tear down seeded resources' }, init: { type: 'boolean', default: false, describe: 'Create an example workos-seed.yml file' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runSeed } = await import('./commands/seed.js'); @@ -2164,7 +2164,7 @@ yargs(rawArgs) resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl(), ); - }, + }), ) .command( 'setup-org ', @@ -2176,7 +2176,7 @@ yargs(rawArgs) domain: { type: 'string', describe: 'Domain to add and verify' }, roles: { type: 'string', describe: 'Comma-separated role slugs to create' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runSetupOrg } = await import('./commands/setup-org.js'); @@ -2185,7 +2185,7 @@ yargs(rawArgs) resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl(), ); - }, + }), ) .command( 'onboard-user ', @@ -2198,7 +2198,7 @@ yargs(rawArgs) role: { type: 'string', describe: 'Role slug to assign' }, wait: { type: 'boolean', default: false, describe: 'Wait for invitation acceptance' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runOnboardUser } = await import('./commands/onboard-user.js'); @@ -2207,7 +2207,7 @@ yargs(rawArgs) resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl(), ); - }, + }), ) .command( 'debug-sso ', @@ -2217,12 +2217,12 @@ yargs(rawArgs) ...insecureStorageOption, 'api-key': { type: 'string' as const, describe: 'WorkOS API key' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runDebugSso } = await import('./commands/debug-sso.js'); await runDebugSso(argv.connectionId, resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl()); - }, + }), ) .command( 'debug-sync ', @@ -2232,12 +2232,12 @@ yargs(rawArgs) ...insecureStorageOption, 'api-key': { type: 'string' as const, describe: 'WorkOS API key' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { resolveApiKey, resolveApiBaseUrl } = await import('./lib/api-key.js'); const { runDebugSync } = await import('./commands/debug-sync.js'); await runDebugSync(argv.directoryId, resolveApiKey({ apiKey: argv.apiKey }), resolveApiBaseUrl()); - }, + }), ) // Alias — canonical command is `workos env claim` .command( @@ -2247,11 +2247,11 @@ yargs(rawArgs) yargs.options({ ...insecureStorageOption, }), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage); const { runClaim } = await import('./commands/claim.js'); await runClaim(); - }, + }), ) .command( 'install', @@ -2272,10 +2272,10 @@ yargs(rawArgs) port: { type: 'number', default: 4100, describe: 'Port to listen on' }, seed: { type: 'string', describe: 'Path to seed config file (YAML or JSON)' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { const { runEmulate } = await import('./commands/emulate.js'); await runEmulate({ port: argv.port, seed: argv.seed, json: argv.json as boolean }); - }, + }), ) .command( 'dev', @@ -2285,14 +2285,14 @@ yargs(rawArgs) port: { type: 'number', default: 4100, describe: 'Emulator port' }, seed: { type: 'string', describe: 'Path to seed config file' }, }), - async (argv) => { + wrapCommandHandler(async (argv) => { const { runDev } = await import('./commands/dev.js'); await runDev({ port: argv.port, seed: argv.seed, '--': argv['--'] as string[] | undefined, }); - }, + }), ) .command('debug', false, (yargs) => { yargs.options(insecureStorageOption); diff --git a/src/lib/command-aliases.ts b/src/lib/command-aliases.ts index 8f515ae3..7a8632ef 100644 --- a/src/lib/command-aliases.ts +++ b/src/lib/command-aliases.ts @@ -7,4 +7,5 @@ */ export const COMMAND_ALIASES: Record = { org: 'organization', + claim: 'env.claim', }; diff --git a/src/utils/command-telemetry.ts b/src/utils/command-telemetry.ts index 57603b19..887ba472 100644 --- a/src/utils/command-telemetry.ts +++ b/src/utils/command-telemetry.ts @@ -2,6 +2,9 @@ import { analytics } from './analytics.js'; import { COMMAND_ALIASES } from '../lib/command-aliases.js'; import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; +/** Commands that have their own telemetry (e.g., installer session events). */ +const SKIP_TELEMETRY_COMMANDS = new Set(['install']); + /** * Resolve user-typed command parts to their canonical name. * Applies alias mapping to the top-level command only. @@ -51,6 +54,10 @@ export function commandTelemetryMiddleware(rawArgs: string[]) { argv.__telemetryStartTime = startTime; argv.__telemetryFlags = flags; + // Skip provisional event for commands with their own telemetry (e.g., install) + const topLevelCommand = commandParts[0] ?? ''; + if (SKIP_TELEMETRY_COMMANDS.has(topLevelCommand)) return; + // Queue provisional event NOW, before the handler runs. // If the handler calls process.exit(), store-forward persists this. analytics.commandExecuted(commandName, 0, true, { flags }); diff --git a/src/utils/telemetry-store-forward.ts b/src/utils/telemetry-store-forward.ts index 1339086f..1ed7a70d 100644 --- a/src/utils/telemetry-store-forward.ts +++ b/src/utils/telemetry-store-forward.ts @@ -29,28 +29,34 @@ export async function recoverPendingEvents(): Promise { (f) => f.startsWith('pending-') && f.endsWith('.json'), ); + const recoveredFiles: string[] = []; for (const file of files) { const filePath = join(PENDING_DIR, file); try { const raw = readFileSync(filePath, 'utf-8'); - unlinkSync(filePath); // Delete immediately to avoid double-send - const events = JSON.parse(raw); if (Array.isArray(events) && events.length > 0) { telemetryClient.queueEvents(events); + recoveredFiles.push(filePath); + } else { + // Empty file — delete immediately + try { unlinkSync(filePath); } catch { /* ignore */ } } } catch { // Corrupted file — delete and move on - try { - unlinkSync(filePath); - } catch { - /* ignore */ - } + try { unlinkSync(filePath); } catch { /* ignore */ } } } // Flush all recovered events in one batch await telemetryClient.flush(); + + // Only delete files AFTER successful flush. + // If flush fails, events are retained in memory and will be + // re-persisted by the exit handler (store-forward). + for (const filePath of recoveredFiles) { + try { unlinkSync(filePath); } catch { /* ignore */ } + } } catch { debug('[Telemetry] Store-forward recovery failed silently'); } From 9ddf5b722f3feb6d1753f007cb4e42b655d147dc Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Tue, 14 Apr 2026 18:12:04 -0500 Subject: [PATCH 05/15] fix: drop telemetry events on 4xx responses to prevent accumulation Client errors (401, 403) are permanent failures that won't succeed on retry. Only retain events for 5xx (transient server errors) and network failures where store-forward retry is meaningful. --- src/utils/telemetry-client.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/utils/telemetry-client.ts b/src/utils/telemetry-client.ts index 57f442f7..877cdd81 100644 --- a/src/utils/telemetry-client.ts +++ b/src/utils/telemetry-client.ts @@ -95,7 +95,11 @@ export class TelemetryClient { this.events = []; } else { debug(`[Telemetry] Failed to send: ${response.status}`); - // Events remain in queue for store-forward to persist + // Clear on 4xx (permanent failures like 401/403 that won't succeed on retry). + // Retain only on 5xx (server errors that may be transient). + if (response.status >= 400 && response.status < 500) { + this.events = []; + } } } catch (error) { debug(`[Telemetry] Error sending events: ${error}`); From 20303e2555cd965c04ce3482e62e6794f2164eaa Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Tue, 14 Apr 2026 18:21:33 -0500 Subject: [PATCH 06/15] fix: flush returns boolean, splice prevents race, in-process delivery - flush() returns true (sent/dropped) or false (retryable) so callers can act on the result - Use splice(0, count) instead of clearing all events, protecting events queued concurrently during the fetch - wrapCommandHandler flushes in-process so events are sent immediately instead of always deferring to next invocation via store-forward - Store-forward recovery deletes files after loading into memory (events are re-persisted by exit handler if flush fails) - Skip provisional events for dashboard and $0 (installer entry points) - Add 4xx drop test coverage --- src/utils/command-telemetry.ts | 10 ++++++++-- src/utils/telemetry-client.spec.ts | 23 +++++++++++++++++------ src/utils/telemetry-client.ts | 28 +++++++++++++++++++--------- src/utils/telemetry-store-forward.ts | 12 ++++++------ 4 files changed, 50 insertions(+), 23 deletions(-) diff --git a/src/utils/command-telemetry.ts b/src/utils/command-telemetry.ts index 887ba472..facd812c 100644 --- a/src/utils/command-telemetry.ts +++ b/src/utils/command-telemetry.ts @@ -1,9 +1,11 @@ import { analytics } from './analytics.js'; +import { telemetryClient } from './telemetry-client.js'; import { COMMAND_ALIASES } from '../lib/command-aliases.js'; import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; -/** Commands that have their own telemetry (e.g., installer session events). */ -const SKIP_TELEMETRY_COMMANDS = new Set(['install']); +/** Commands that have their own telemetry (e.g., installer session events). + * 'root' is the default $0 handler which prompts to run the installer. */ +const SKIP_TELEMETRY_COMMANDS = new Set(['install', 'dashboard', 'root']); /** * Resolve user-typed command parts to their canonical name. @@ -88,6 +90,10 @@ export function wrapCommandHandler( flags, }); throw error; + } finally { + // Flush in-process so events are sent immediately, not deferred to next invocation. + // If flush fails, store-forward persists on exit. + await telemetryClient.flush().catch(() => {}); } }; } diff --git a/src/utils/telemetry-client.spec.ts b/src/utils/telemetry-client.spec.ts index 23609263..4746f598 100644 --- a/src/utils/telemetry-client.spec.ts +++ b/src/utils/telemetry-client.spec.ts @@ -170,22 +170,33 @@ describe('TelemetryClient', () => { expect(mockFetch).toHaveBeenCalledTimes(2); }); - it('handles network errors silently', async () => { + it('returns false on network errors (retryable)', async () => { mockFetch.mockRejectedValueOnce(new Error('Network error')); client.setGatewayUrl('http://localhost:8000'); client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); - // Should not throw - await expect(client.flush()).resolves.toBeUndefined(); + await expect(client.flush()).resolves.toBe(false); }); - it('handles non-ok responses silently', async () => { + it('returns false on 5xx (retryable, events retained)', async () => { mockFetch.mockResolvedValueOnce({ ok: false, status: 500 }); client.setGatewayUrl('http://localhost:8000'); client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); - // Should not throw - await expect(client.flush()).resolves.toBeUndefined(); + await expect(client.flush()).resolves.toBe(false); + }); + + it('drops events on 4xx and returns true (permanent failure)', async () => { + mockFetch.mockResolvedValueOnce({ ok: false, status: 401 }); + client.setGatewayUrl('http://localhost:8000'); + client.queueEvent({ type: 'session.start', sessionId: '123', timestamp: new Date().toISOString() }); + + const result = await client.flush(); + expect(result).toBe(true); + // Verify events were cleared — second flush should be a no-op + mockFetch.mockClear(); + await client.flush(); + expect(mockFetch).not.toHaveBeenCalled(); }); it('sends correct Content-Type header', async () => { diff --git a/src/utils/telemetry-client.ts b/src/utils/telemetry-client.ts index 877cdd81..5d20cf3d 100644 --- a/src/utils/telemetry-client.ts +++ b/src/utils/telemetry-client.ts @@ -45,15 +45,21 @@ export class TelemetryClient { this.events.push(...events); } - async flush(): Promise { - if (this.events.length === 0) return; + /** + * Flush queued events. Returns true if events were sent or intentionally + * dropped (4xx), false if they should be retried (5xx/network error). + * Uses splice to only remove the events that were in the snapshot, + * protecting any events queued concurrently during the fetch. + */ + async flush(): Promise { + if (this.events.length === 0) return true; if (!this.gatewayUrl) { debug('[Telemetry] No gateway URL configured, skipping flush'); - return; + return false; } - const payload: TelemetryRequest = { events: [...this.events] }; - // DO NOT clear this.events yet — retain until success + const count = this.events.length; + const payload: TelemetryRequest = { events: this.events.slice(0, count) }; const headers: Record = { 'Content-Type': 'application/json', @@ -92,18 +98,22 @@ export class TelemetryClient { }); if (response.ok) { - this.events = []; + this.events.splice(0, count); + return true; } else { debug(`[Telemetry] Failed to send: ${response.status}`); - // Clear on 4xx (permanent failures like 401/403 that won't succeed on retry). - // Retain only on 5xx (server errors that may be transient). + // Drop on 4xx (permanent failures like 401/403 won't succeed on retry). + // Retain on 5xx (transient server errors) for store-forward. if (response.status >= 400 && response.status < 500) { - this.events = []; + this.events.splice(0, count); + return true; // intentionally dropped } + return false; } } catch (error) { debug(`[Telemetry] Error sending events: ${error}`); // Events remain in queue for store-forward to persist + return false; } finally { clearTimeout(timeout); } diff --git a/src/utils/telemetry-store-forward.ts b/src/utils/telemetry-store-forward.ts index 1ed7a70d..bda57b4b 100644 --- a/src/utils/telemetry-store-forward.ts +++ b/src/utils/telemetry-store-forward.ts @@ -48,15 +48,15 @@ export async function recoverPendingEvents(): Promise { } } - // Flush all recovered events in one batch - await telemetryClient.flush(); - - // Only delete files AFTER successful flush. - // If flush fails, events are retained in memory and will be - // re-persisted by the exit handler (store-forward). + // Delete source files — events are now in memory regardless of flush outcome. + // If flush succeeds: events sent, done. + // If flush fails: events stay in memory, exit handler re-persists to new PID file. for (const filePath of recoveredFiles) { try { unlinkSync(filePath); } catch { /* ignore */ } } + + // Flush all recovered events in one batch + await telemetryClient.flush(); } catch { debug('[Telemetry] Store-forward recovery failed silently'); } From fd5c50e2f70d2a92d0d3d4ba15c14160184575c1 Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Wed, 15 Apr 2026 09:27:42 -0500 Subject: [PATCH 07/15] docs: document telemetry wiring requirements for new commands Add a section to CLAUDE.md explaining which commands auto-emit telemetry (registerSubcommand) versus which need manual wrapCommandHandler wrapping (inline top-level .command() calls). Add a pointer comment in bin.ts near the workflow commands block. Prevents new top-level commands from silently emitting duration=0 telemetry. --- CLAUDE.md | 33 +++++++++++++++++++++++++++++++++ src/bin.ts | 4 ++++ 2 files changed, 37 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index c97a1678..0afd3004 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -48,6 +48,39 @@ pnpm typecheck # Type check 2. Register in `src/bin.ts` and update `src/utils/help-json.ts` command registry 3. Include JSON mode tests in spec file +## Telemetry Wiring for New Commands + +All commands auto-emit a `command` telemetry event with name, duration, and success/failure. How you register the command determines whether this is automatic: + +**Subcommands via `registerSubcommand()`** → auto-wired. Telemetry happens for free. + +```typescript +.command('user', 'Manage users', (yargs) => { + registerSubcommand(yargs, 'reset-password', '...', (y) => y, + async (argv) => { await runResetPassword(argv); }, // auto-wrapped + ); +}) +``` + +**Top-level `.command()` with inline handler** → MUST manually wrap with `wrapCommandHandler()`: + +```typescript +.command( + 'migrate', + 'Migrate from another provider', + (yargs) => yargs.options({...}), + wrapCommandHandler(async (argv) => { // <-- REQUIRED + await runMigrate(argv); + }), +) +``` + +If you forget `wrapCommandHandler`, the command still emits a telemetry event (queued by middleware), but duration will be `0` and success will always be `true` -- misleading data in dashboards. + +**Skip list**: commands in `SKIP_TELEMETRY_COMMANDS` (`command-telemetry.ts`) are excluded from command-level telemetry because they have their own session-based telemetry. Currently: `install`, `dashboard`, `root` (the default `$0` handler). Add to this set if you're building another installer entry point. + +**Aliases**: if you register a command with multiple names (e.g., `['organization', 'org']`), add the alias to `src/lib/command-aliases.ts` so metrics don't fragment across `org.list` and `organization.list`. + ## Do / Don't **Do:** diff --git a/src/bin.ts b/src/bin.ts index fc8fc724..1e8b6217 100644 --- a/src/bin.ts +++ b/src/bin.ts @@ -2144,6 +2144,10 @@ yargs(rawArgs) return yargs.demandCommand(1, 'Please specify an org-domain subcommand').strict(); }) // --- Workflow Commands --- + // NOTE: Top-level `.command()` registrations with inline handlers MUST wrap + // the handler with `wrapCommandHandler()` for correct command telemetry. + // Subcommands registered via `registerSubcommand()` are auto-wrapped. + // See CLAUDE.md "Telemetry Wiring for New Commands". .command( 'seed', 'Seed WorkOS environment from a YAML config file', From 1123e0e18e45bbea4914b08d3f1c2038c74f2af9 Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Wed, 15 Apr 2026 09:39:26 -0500 Subject: [PATCH 08/15] feat: add user identification and unclaimed env support to telemetry - Add workos.user_id to command and crash events (from stored credentials or unclaimed environment clientId) so dashboards can count unique users - Add cli.version to command and crash events for release adoption tracking - Support claim-token auth path on the telemetry client, so unclaimed environments' telemetry reaches the API (guard accepts this path too) - Rename CrashEvent's installer.version to cli.version (crashes happen outside the installer too) - initForNonInstaller() now wires up user_id and claim-token auth --- src/utils/analytics.spec.ts | 8 +++++--- src/utils/analytics.ts | 33 ++++++++++++++++++++++++++++++--- src/utils/telemetry-client.ts | 16 ++++++++++++++++ src/utils/telemetry-types.ts | 5 ++++- 4 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/utils/analytics.spec.ts b/src/utils/analytics.spec.ts index c98531b8..f354e135 100644 --- a/src/utils/analytics.spec.ts +++ b/src/utils/analytics.spec.ts @@ -506,7 +506,7 @@ describe('Analytics', () => { 'crash.error_message': 'Unexpected failure', 'crash.stack': 'Error: Unexpected failure\n at foo.ts:1', 'crash.command': 'install', - 'installer.version': '1.0.0', + 'cli.version': '1.0.0', 'env.os': expect.any(String), 'env.node_version': expect.any(String), }), @@ -523,11 +523,13 @@ describe('Analytics', () => { expect(event.attributes['crash.stack'].length).toBe(4096); }); - it('defaults version to unknown when not provided', () => { + it('falls back to package version when not explicitly provided', () => { analytics.captureUnhandledCrash(new Error('test')); const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'crash')[0]; - expect(event.attributes['installer.version']).toBe('unknown'); + // Falls back to getVersion() which reads from package.json — any real version string + expect(event.attributes['cli.version']).toEqual(expect.any(String)); + expect(event.attributes['cli.version']).not.toBe(''); }); }); diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index 9253b6e6..14931df6 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -12,8 +12,9 @@ import type { CrashEvent, } from './telemetry-types.js'; import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; -import { getLlmGatewayUrl } from '../lib/settings.js'; +import { getLlmGatewayUrl, getVersion } from '../lib/settings.js'; import { getCredentials } from '../lib/credentials.js'; +import { getActiveEnvironment, isUnclaimedEnvironment } from '../lib/config-store.js'; export class Analytics { private tags: Record = {}; @@ -47,7 +48,14 @@ export class Analytics { /** * Initialize telemetry for non-installer commands. - * Sets gatewayUrl from default config and loads stored JWT if available. + * Sets gatewayUrl from default config and loads auth credentials. + * + * Auth priority: + * 1. Stored JWT (Bearer) — logged-in users + * 2. Claim token — unclaimed environments + * 3. None — API-key-only users (telemetry silently dropped by API guard) + * + * Also captures the user/environment identifier for per-user analytics. * The installer flow sets these itself in run-with-core.ts; this covers * management commands like `org list`, `auth login`, etc. */ @@ -61,6 +69,22 @@ export class Analytics { if (creds?.accessToken) { telemetryClient.setAccessToken(creds.accessToken); } + if (creds?.userId) { + this.distinctId = creds.userId; + } + + // Check for unclaimed environment — fall back to claim-token auth + // so unclaimed users' telemetry still reaches the backend. + try { + const env = getActiveEnvironment(); + if (env && isUnclaimedEnvironment(env)) { + telemetryClient.setClaimTokenAuth(env.clientId, env.claimToken); + // Tag distinctId so unclaimed sessions are identifiable in analytics + this.distinctId = this.distinctId ?? `unclaimed:${env.clientId}`; + } + } catch { + // Config-store failure is non-fatal for telemetry + } } setTag(key: string, value: string | boolean | number | null | undefined) { @@ -215,6 +239,8 @@ export class Analytics { 'command.name': name, 'command.duration_ms': durationMs, 'command.success': success, + 'cli.version': getVersion(), + ...(this.distinctId ? { 'workos.user_id': this.distinctId } : {}), ...(options?.error ? { 'command.error_type': options.error.name, @@ -264,7 +290,8 @@ export class Analytics { 'crash.error_message': error.message, 'crash.stack': truncatedStack, ...(options?.command ? { 'crash.command': options.command } : {}), - 'installer.version': options?.version ?? 'unknown', + 'cli.version': options?.version ?? getVersion(), + ...(this.distinctId ? { 'workos.user_id': this.distinctId } : {}), ...this.getEnvFingerprint(), }, }; diff --git a/src/utils/telemetry-client.ts b/src/utils/telemetry-client.ts index 5d20cf3d..3df750c3 100644 --- a/src/utils/telemetry-client.ts +++ b/src/utils/telemetry-client.ts @@ -11,6 +11,8 @@ import { getCredentials } from '../lib/credentials.js'; export class TelemetryClient { private events: TelemetryEvent[] = []; private accessToken: string | null = null; + private claimToken: string | null = null; + private clientId: string | null = null; private gatewayUrl: string | null = null; setGatewayUrl(url: string) { @@ -21,6 +23,16 @@ export class TelemetryClient { this.accessToken = token; } + /** + * Set claim-token auth for unclaimed environments. + * The API's LlmGatewayGuard accepts either a JWT (Bearer) or claim token + * (x-workos-claim-token + x-workos-client-id headers). + */ + setClaimTokenAuth(clientId: string, claimToken: string) { + this.clientId = clientId; + this.claimToken = claimToken; + } + queueEvent(event: TelemetryEvent) { this.events.push(event); } @@ -69,6 +81,10 @@ export class TelemetryClient { const token = freshCreds?.accessToken ?? this.accessToken; if (token) { headers['Authorization'] = `Bearer ${token}`; + } else if (this.claimToken && this.clientId) { + // Unclaimed environment auth path — guard accepts this instead of JWT + headers['x-workos-claim-token'] = this.claimToken; + headers['x-workos-client-id'] = this.clientId; } const controller = new AbortController(); diff --git a/src/utils/telemetry-types.ts b/src/utils/telemetry-types.ts index 93faa117..7d84eb99 100644 --- a/src/utils/telemetry-types.ts +++ b/src/utils/telemetry-types.ts @@ -70,6 +70,8 @@ export interface CommandEvent extends TelemetryEvent { 'command.error_type'?: string; 'command.error_message'?: string; 'command.flags'?: string; + 'cli.version': string; + 'workos.user_id'?: string; 'env.os': string; 'env.os_version': string; 'env.node_version': string; @@ -86,7 +88,8 @@ export interface CrashEvent extends TelemetryEvent { 'crash.error_message': string; 'crash.stack': string; 'crash.command'?: string; - 'installer.version': string; + 'cli.version': string; + 'workos.user_id'?: string; 'env.os': string; 'env.os_version': string; 'env.node_version': string; From 5174fad56516349a63848c4365e41bfc94e67b73 Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Wed, 15 Apr 2026 14:03:37 -0500 Subject: [PATCH 09/15] fix: sanitize error.message and error.stack in telemetry events Closes security-audit finding #1 on PR #122 (telemetry message sanitization). `error.message` was flowing into 4 capture sites unsanitized, leaking homedir paths (and rarely, credentials) to the WorkOS gateway. - Add `sanitizeMessage()` in crash-reporter.ts: homedir strip + Bearer/ sk_*/JWT redaction + 1KB truncation. - Factor secret redaction into shared `redactSecrets()` used by both `sanitizeMessage` and `sanitizeStack` (Node echoes `.message` into the leading `Error.stack` line, so message-only sanitization was insufficient). - Add private `extractErrorFields()` chokepoint on `Analytics`; route all 4 capture sites through it (`captureException`, `stepCompleted`, `commandExecuted`, `captureUnhandledCrash`). `replaceLastCommandEvent` inherits sanitization via its delegation to `commandExecuted`. - `captureUnhandledCrash` now uses `sanitizeStack` instead of inline truncation, providing defense-in-depth for callers that bypass the crash-reporter wrapper. - Add regression guard test (`telemetry-sanitize.spec.ts`): poisons every capture method with homedir + Bearer + sk_live_ + JWT, asserts no marker reaches the serialized queue. Reviewed: ideation:reviewer cycle 1 PASS (0 critical, 0 high). --- src/utils/analytics.spec.ts | 6 +- src/utils/analytics.ts | 37 +++-- src/utils/crash-reporter.ts | 36 +++- src/utils/telemetry-sanitize.spec.ts | 235 +++++++++++++++++++++++++++ 4 files changed, 299 insertions(+), 15 deletions(-) create mode 100644 src/utils/telemetry-sanitize.spec.ts diff --git a/src/utils/analytics.spec.ts b/src/utils/analytics.spec.ts index f354e135..24bf8632 100644 --- a/src/utils/analytics.spec.ts +++ b/src/utils/analytics.spec.ts @@ -514,13 +514,15 @@ describe('Analytics', () => { ); }); - it('truncates stack traces to 4KB', () => { + it('truncates stack traces to 4KB with a truncation marker', () => { const error = new Error('Big stack'); error.stack = 'x'.repeat(5000); analytics.captureUnhandledCrash(error); const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'crash')[0]; - expect(event.attributes['crash.stack'].length).toBe(4096); + // sanitizeStack truncates at 4096 and appends '\n...[truncated]' + expect(event.attributes['crash.stack']).toMatch(/\n\.\.\.\[truncated\]$/); + expect(event.attributes['crash.stack'].startsWith('x'.repeat(4096))).toBe(true); }); it('falls back to package version when not explicitly provided', () => { diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index 14931df6..52a525ad 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -15,6 +15,7 @@ import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; import { getLlmGatewayUrl, getVersion } from '../lib/settings.js'; import { getCredentials } from '../lib/credentials.js'; import { getActiveEnvironment, isUnclaimedEnvironment } from '../lib/config-store.js'; +import { sanitizeMessage, sanitizeStack } from './crash-reporter.js'; export class Analytics { private tags: Record = {}; @@ -110,8 +111,9 @@ export class Analytics { if (!WORKOS_TELEMETRY_ENABLED) return; debug('[Analytics] captureException:', error.message, properties); - this.tags['error.type'] = error.name; - this.tags['error.message'] = error.message; + const { type, message } = this.extractErrorFields(error); + this.tags['error.type'] = type; + this.tags['error.message'] = message; } async getFeatureFlag(_flagKey: string): Promise { @@ -119,6 +121,18 @@ export class Analytics { return undefined; } + /** + * Single chokepoint for converting an Error into telemetry-safe fields. + * All capture methods that record error details MUST go through this. + * Sanitizes the message via sanitizeMessage (homedir + secret patterns + truncation). + */ + private extractErrorFields(error: Error): { type: string; message: string } { + return { + type: error.name, + message: sanitizeMessage(error.message), + }; + } + private detectCiProvider(): string | undefined { if (process.env.GITHUB_ACTIONS) return 'github-actions'; if (process.env.BUILDKITE) return 'buildkite'; @@ -179,7 +193,7 @@ export class Analytics { startTimestamp: new Date(Date.now() - durationMs).toISOString(), durationMs, success, - error: error ? { type: error.name, message: error.message } : undefined, + error: error ? this.extractErrorFields(error) : undefined, }; telemetryClient.queueEvent(event); @@ -231,6 +245,8 @@ export class Analytics { ) { if (!WORKOS_TELEMETRY_ENABLED) return; + const errorFields = options?.error ? this.extractErrorFields(options.error) : undefined; + const event: CommandEvent = { type: 'command', sessionId: this.sessionId, @@ -241,10 +257,10 @@ export class Analytics { 'command.success': success, 'cli.version': getVersion(), ...(this.distinctId ? { 'workos.user_id': this.distinctId } : {}), - ...(options?.error + ...(errorFields ? { - 'command.error_type': options.error.name, - 'command.error_message': options.error.message, + 'command.error_type': errorFields.type, + 'command.error_message': errorFields.message, } : {}), ...(options?.flags?.length @@ -278,17 +294,16 @@ export class Analytics { captureUnhandledCrash(error: Error, options?: { command?: string; version?: string }) { if (!WORKOS_TELEMETRY_ENABLED) return; - const stack = error.stack ?? ''; - const truncatedStack = stack.length > 4096 ? stack.slice(0, 4096) : stack; + const { type, message } = this.extractErrorFields(error); const event: CrashEvent = { type: 'crash', sessionId: this.sessionId, timestamp: new Date().toISOString(), attributes: { - 'crash.error_type': error.name, - 'crash.error_message': error.message, - 'crash.stack': truncatedStack, + 'crash.error_type': type, + 'crash.error_message': message, + 'crash.stack': sanitizeStack(error.stack), ...(options?.command ? { 'crash.command': options.command } : {}), 'cli.version': options?.version ?? getVersion(), ...(this.distinctId ? { 'workos.user_id': this.distinctId } : {}), diff --git a/src/utils/crash-reporter.ts b/src/utils/crash-reporter.ts index 14207793..299b56c1 100644 --- a/src/utils/crash-reporter.ts +++ b/src/utils/crash-reporter.ts @@ -2,11 +2,26 @@ import { analytics } from './analytics.js'; import { homedir } from 'node:os'; const MAX_STACK_LENGTH = 4096; +const MAX_MESSAGE_LENGTH = 1024; let isCrashing = false; /** - * Sanitize stack trace: strip absolute paths to relative, remove home dir. - * Prevents leaking file system layout in telemetry events. + * Redact known credential patterns from a string: + * Bearer tokens, sk_test_/sk_live_ keys, raw JWTs. + * Used by both sanitizeStack and sanitizeMessage so secrets that appear + * in error messages (which Node echoes into stack first lines) are removed + * regardless of which path captures them. + */ +function redactSecrets(s: string): string { + return s + .replace(/Bearer\s+[A-Za-z0-9._-]+/g, 'Bearer ') + .replace(/\bsk_(test|live)_[A-Za-z0-9]+/g, 'sk_') + .replace(/\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+/g, ''); +} + +/** + * Sanitize stack trace: strip absolute paths to relative, remove home dir, + * redact credentials that may appear via the leading "Error: " line. */ export function sanitizeStack(stack: string | undefined): string { if (!stack) return ''; @@ -14,11 +29,28 @@ export function sanitizeStack(stack: string | undefined): string { let sanitized = stack; sanitized = sanitized.replaceAll(home, '~'); sanitized = sanitized.replace(/\/[^\s:]+\/(node_modules|dist|src)\//g, '$1/'); + sanitized = redactSecrets(sanitized); return sanitized.length > MAX_STACK_LENGTH ? sanitized.slice(0, MAX_STACK_LENGTH) + '\n...[truncated]' : sanitized; } +/** + * Sanitize an error message before sending to telemetry. + * Strips home directory and redacts known credential patterns + * (Bearer tokens, sk_test_/sk_live_ keys, raw JWTs). + * Truncates to MAX_MESSAGE_LENGTH chars. + */ +export function sanitizeMessage(msg: string | undefined): string { + if (!msg) return ''; + const home = homedir(); + let sanitized = msg.replaceAll(home, '~'); + sanitized = redactSecrets(sanitized); + return sanitized.length > MAX_MESSAGE_LENGTH + ? sanitized.slice(0, MAX_MESSAGE_LENGTH) + '...[truncated]' + : sanitized; +} + /** * Register global handlers for uncaughtException and unhandledRejection * that capture crash details before the process exits. diff --git a/src/utils/telemetry-sanitize.spec.ts b/src/utils/telemetry-sanitize.spec.ts new file mode 100644 index 00000000..9724626e --- /dev/null +++ b/src/utils/telemetry-sanitize.spec.ts @@ -0,0 +1,235 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { homedir } from 'node:os'; +import { sanitizeMessage } from './crash-reporter.js'; + +// Mock telemetry client so we can inspect queued events without HTTP. +// Use vi.hoisted so these are available when the hoisted vi.mock factory runs +// (importing sanitizeMessage transitively loads analytics.ts which loads telemetry-client.ts). +const { + mockSetGatewayUrl, + mockSetAccessToken, + mockQueueEvent, + mockFlush, + mockReplaceLastEventOfType, +} = vi.hoisted(() => ({ + mockSetGatewayUrl: vi.fn(), + mockSetAccessToken: vi.fn(), + mockQueueEvent: vi.fn(), + mockFlush: vi.fn().mockResolvedValue(undefined), + mockReplaceLastEventOfType: vi.fn(), +})); + +vi.mock('./telemetry-client.js', () => ({ + telemetryClient: { + setGatewayUrl: mockSetGatewayUrl, + setAccessToken: mockSetAccessToken, + queueEvent: mockQueueEvent, + flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), + }, +})); + +vi.mock('./debug.js', () => ({ + debug: vi.fn(), +})); + +vi.mock('uuid', () => ({ + v4: () => 'test-session-id-sanitize', +})); + +vi.mock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => 'https://api.workos.com/llm-gateway', + getConfig: () => ({ + nodeVersion: '>=18', + logging: { debugMode: false }, + telemetry: { enabled: true, eventName: 'installer_interaction' }, + documentation: { + workosDocsUrl: 'https://workos.com/docs', + dashboardUrl: 'https://dashboard.workos.com', + issuesUrl: 'https://github.com', + }, + legacy: { oauthPort: 3000 }, + }), + getVersion: () => '0.0.0-test', +})); + +vi.mock('../lib/credentials.js', () => ({ + getCredentials: vi.fn(), +})); + +describe('sanitizeMessage', () => { + it('strips the home directory', () => { + const home = homedir(); + const input = `ENOENT: no such file or directory, open '${home}/.workos/credentials.json'`; + const out = sanitizeMessage(input); + expect(out).not.toContain(home); + expect(out).toContain('~/.workos/credentials.json'); + }); + + it('redacts Bearer tokens', () => { + const out = sanitizeMessage('401 Unauthorized: Bearer abc123.def456.ghi789 invalid'); + expect(out).not.toContain('abc123.def456.ghi789'); + expect(out).toContain('Bearer '); + }); + + it('redacts sk_live_ keys', () => { + const out = sanitizeMessage('Authentication failed for sk_live_xyzABC123'); + expect(out).not.toContain('sk_live_xyzABC123'); + expect(out).toContain('sk_'); + }); + + it('redacts sk_test_ keys', () => { + const out = sanitizeMessage('Bad key sk_test_qrsTUV456 in request'); + expect(out).not.toContain('sk_test_qrsTUV456'); + expect(out).toContain('sk_'); + }); + + it('redacts raw JWTs', () => { + const jwt = 'eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjMifQ.signature_value'; + const out = sanitizeMessage(`Token ${jwt} expired`); + expect(out).not.toContain(jwt); + expect(out).toContain(''); + }); + + it('truncates messages longer than 1024 chars with marker', () => { + const long = 'a'.repeat(2000); + const out = sanitizeMessage(long); + expect(out.length).toBe(1024 + '...[truncated]'.length); + expect(out.endsWith('...[truncated]')).toBe(true); + }); + + it('redacts before truncating so secrets near the boundary are not partially preserved', () => { + // Place a JWT at position 1010 so its tail would fall past the 1024 cap. + const padding = 'x'.repeat(1010); + const jwt = 'eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjMifQ.signature_value'; + const out = sanitizeMessage(padding + jwt); + expect(out).not.toContain('signature_value'); + expect(out).not.toContain('eyJhbGciOiJIUzI1NiJ9'); + }); + + it('returns empty string for undefined or empty input', () => { + expect(sanitizeMessage(undefined)).toBe(''); + expect(sanitizeMessage('')).toBe(''); + }); + + it('redacts all marker types in a single string', () => { + const home = homedir(); + const input = `${home}/x Bearer abc.def.ghi sk_live_ABC123 eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjMifQ.signature_value`; + const out = sanitizeMessage(input); + expect(out).not.toContain(home); + expect(out).not.toContain('abc.def.ghi'); + expect(out).not.toContain('sk_live_ABC123'); + expect(out).not.toContain('eyJhbGciOiJIUzI1NiJ9'); + expect(out).toContain('~/x'); + expect(out).toContain('Bearer '); + expect(out).toContain('sk_'); + expect(out).toContain(''); + }); +}); + +describe('Analytics: no PII or secrets in queued events', () => { + const home = homedir(); + const POISON_BEARER = 'abc123.def456.ghi789'; + const POISON_SK = 'sk_live_xyzABC123'; + const POISON_JWT = 'eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjMifQ.signature_value'; + const POISON_PATH = `${home}/.workos/credentials.json`; + const POISON_MESSAGE = `ENOENT at ${POISON_PATH} Bearer ${POISON_BEARER} key=${POISON_SK} jwt=${POISON_JWT}`; + + let Analytics: typeof import('./analytics.js').Analytics; + let analytics: InstanceType; + const originalEnv = process.env.WORKOS_TELEMETRY; + + beforeEach(async () => { + vi.clearAllMocks(); + delete process.env.WORKOS_TELEMETRY; + vi.resetModules(); + vi.doMock('./telemetry-client.js', () => ({ + telemetryClient: { + setGatewayUrl: mockSetGatewayUrl, + setAccessToken: mockSetAccessToken, + queueEvent: mockQueueEvent, + flush: mockFlush, + replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), + }, + })); + vi.doMock('../lib/settings.js', () => ({ + getLlmGatewayUrl: () => 'https://api.workos.com/llm-gateway', + getConfig: () => ({ + nodeVersion: '>=18', + logging: { debugMode: false }, + telemetry: { enabled: true, eventName: 'installer_interaction' }, + documentation: { + workosDocsUrl: 'https://workos.com/docs', + dashboardUrl: 'https://dashboard.workos.com', + issuesUrl: 'https://github.com', + }, + legacy: { oauthPort: 3000 }, + }), + getVersion: () => '0.0.0-test', + })); + vi.doMock('../lib/credentials.js', () => ({ + getCredentials: vi.fn(), + })); + const module = await import('./analytics.js'); + Analytics = module.Analytics; + analytics = new Analytics(); + }); + + afterEach(() => { + if (originalEnv !== undefined) { + process.env.WORKOS_TELEMETRY = originalEnv; + } else { + delete process.env.WORKOS_TELEMETRY; + } + }); + + function assertCleanQueue() { + const serialized = JSON.stringify(mockQueueEvent.mock.calls); + expect(serialized).not.toContain(home); + expect(serialized).not.toContain(POISON_BEARER); + expect(serialized).not.toContain(POISON_SK); + expect(serialized).not.toContain(POISON_JWT); + } + + it('stepCompleted: poisoned error.message does not leak markers', () => { + const err = new Error(POISON_MESSAGE); + analytics.stepCompleted('test-step', 100, false, err); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); + + it('commandExecuted: poisoned error.message does not leak markers', () => { + const err = new Error(POISON_MESSAGE); + analytics.commandExecuted('test-command', 100, false, { error: err }); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); + + it('captureUnhandledCrash: poisoned error.message does not leak markers', () => { + const err = new Error(POISON_MESSAGE); + analytics.captureUnhandledCrash(err); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); + + it('captureException: poisoned error.message does not leak via session.end tags', async () => { + const err = new Error(POISON_MESSAGE); + analytics.captureException(err); + // captureException stores into tags; tags flow into session.end at shutdown. + await analytics.shutdown('error'); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); + + it('replaceLastCommandEvent: inherits sanitization via commandExecuted', () => { + const err = new Error(POISON_MESSAGE); + // First queue a provisional event (would normally happen in middleware). + analytics.commandExecuted('test-command', 0, true); + mockQueueEvent.mockClear(); + // Then replace it with the real one carrying the poisoned error. + analytics.replaceLastCommandEvent('test-command', 100, false, { error: err }); + expect(mockReplaceLastEventOfType).toHaveBeenCalledWith('command'); + expect(mockQueueEvent).toHaveBeenCalled(); + assertCleanQueue(); + }); +}); From aa7baf68adff6a72b7759570e68d4ae4b6b85bcb Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Wed, 15 Apr 2026 15:38:05 -0500 Subject: [PATCH 10/15] chore: de-slop --- src/utils/analytics.ts | 6 +----- src/utils/crash-reporter.ts | 37 +++++++++++-------------------------- 2 files changed, 12 insertions(+), 31 deletions(-) diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index 52a525ad..a66a61e0 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -121,11 +121,7 @@ export class Analytics { return undefined; } - /** - * Single chokepoint for converting an Error into telemetry-safe fields. - * All capture methods that record error details MUST go through this. - * Sanitizes the message via sanitizeMessage (homedir + secret patterns + truncation). - */ + /** All capture methods that record error details MUST go through this. */ private extractErrorFields(error: Error): { type: string; message: string } { return { type: error.name, diff --git a/src/utils/crash-reporter.ts b/src/utils/crash-reporter.ts index 299b56c1..36381fd7 100644 --- a/src/utils/crash-reporter.ts +++ b/src/utils/crash-reporter.ts @@ -3,14 +3,14 @@ import { homedir } from 'node:os'; const MAX_STACK_LENGTH = 4096; const MAX_MESSAGE_LENGTH = 1024; +const HOME = homedir(); let isCrashing = false; /** - * Redact known credential patterns from a string: - * Bearer tokens, sk_test_/sk_live_ keys, raw JWTs. - * Used by both sanitizeStack and sanitizeMessage so secrets that appear - * in error messages (which Node echoes into stack first lines) are removed - * regardless of which path captures them. + * Redact known credential patterns (Bearer tokens, sk_test_/sk_live_ keys, + * raw JWTs). Shared by sanitizeStack and sanitizeMessage because Node echoes + * `.message` into the leading `Error.stack` line, so secrets in messages also + * surface in stacks. */ function redactSecrets(s: string): string { return s @@ -19,15 +19,10 @@ function redactSecrets(s: string): string { .replace(/\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+/g, ''); } -/** - * Sanitize stack trace: strip absolute paths to relative, remove home dir, - * redact credentials that may appear via the leading "Error: " line. - */ +/** Sanitize stack trace for telemetry: homedir, absolute-path collapse, secrets, truncation. */ export function sanitizeStack(stack: string | undefined): string { if (!stack) return ''; - const home = homedir(); - let sanitized = stack; - sanitized = sanitized.replaceAll(home, '~'); + let sanitized = stack.replaceAll(HOME, '~'); sanitized = sanitized.replace(/\/[^\s:]+\/(node_modules|dist|src)\//g, '$1/'); sanitized = redactSecrets(sanitized); return sanitized.length > MAX_STACK_LENGTH @@ -35,17 +30,10 @@ export function sanitizeStack(stack: string | undefined): string { : sanitized; } -/** - * Sanitize an error message before sending to telemetry. - * Strips home directory and redacts known credential patterns - * (Bearer tokens, sk_test_/sk_live_ keys, raw JWTs). - * Truncates to MAX_MESSAGE_LENGTH chars. - */ +/** Sanitize an error message for telemetry (homedir, secrets, truncation). */ export function sanitizeMessage(msg: string | undefined): string { if (!msg) return ''; - const home = homedir(); - let sanitized = msg.replaceAll(home, '~'); - sanitized = redactSecrets(sanitized); + const sanitized = redactSecrets(msg.replaceAll(HOME, '~')); return sanitized.length > MAX_MESSAGE_LENGTH ? sanitized.slice(0, MAX_MESSAGE_LENGTH) + '...[truncated]' : sanitized; @@ -76,11 +64,8 @@ function reportCrashSync(error: Error): void { if (isCrashing) return; isCrashing = true; try { - // Sanitize the stack before passing to analytics - const sanitized = new Error(error.message); - sanitized.name = error.name; - sanitized.stack = sanitizeStack(error.stack); - analytics.captureUnhandledCrash(sanitized); + // captureUnhandledCrash sanitizes both message and stack at the analytics boundary. + analytics.captureUnhandledCrash(error); } catch { // Telemetry must never prevent exit } From 0899f8b0226ec7d36b990f5b1e165a70ec8e96e3 Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Wed, 15 Apr 2026 16:20:58 -0500 Subject: [PATCH 11/15] feat: add device.id and auth.mode attributes to telemetry events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces two additive telemetry signals requested by the signals spec: - device.id — persistent per-install UUID stored at ~/.workos/device-id. File IO failures fall through to a one-shot session UUID; never throws. - auth.mode — 4-state enum ('jwt' | 'claim_token' | 'api_key' | 'none') derived during initForNonInstaller with JWT > claim_token > api_key priority. Installer flow sets it in run-with-core after credentials resolve. Both fields are injected via getEnvFingerprint so they appear on every event that already carries env context (session.start, session.end, command, crash). No change to downstream gateway — existing deliveries accept the new attrs as optional. Implements Phase 1 of docs/ideation/telemetry-signals/spec-phase-1.md. Gateway-side type mirroring is tracked separately in the gateway repo. --- src/lib/device-id.spec.ts | 102 ++++++++++++++++++++++++ src/lib/device-id.ts | 57 +++++++++++++ src/lib/run-with-core.ts | 5 ++ src/utils/analytics.spec.ts | 150 +++++++++++++++++++++++++++++++++++ src/utils/analytics.ts | 23 ++++++ src/utils/telemetry-types.ts | 8 ++ 6 files changed, 345 insertions(+) create mode 100644 src/lib/device-id.spec.ts create mode 100644 src/lib/device-id.ts diff --git a/src/lib/device-id.spec.ts b/src/lib/device-id.spec.ts new file mode 100644 index 00000000..6c65cfec --- /dev/null +++ b/src/lib/device-id.spec.ts @@ -0,0 +1,102 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { + existsSync, + readFileSync, + writeFileSync, + mkdtempSync, + rmSync, + chmodSync, + mkdirSync, +} from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +// Mutable testDir rebound in beforeEach; mock closes over it. +let testDir: string; + +vi.mock('node:os', async (importOriginal) => { + const original = await importOriginal(); + return { + ...original, + default: { + ...original, + homedir: () => testDir, + }, + homedir: () => testDir, + }; +}); + +const { getDeviceId, __resetDeviceIdCache } = await import('./device-id.js'); + +const UUID_REGEX = /^[0-9a-f-]{36}$/i; + +describe('device-id', () => { + beforeEach(() => { + testDir = mkdtempSync(join(tmpdir(), 'device-id-test-')); + __resetDeviceIdCache(); + }); + + afterEach(() => { + try { + chmodSync(join(testDir, '.workos'), 0o700); + } catch { + // ignore — dir may not exist or already writable + } + rmSync(testDir, { recursive: true, force: true }); + }); + + it('creates the file on first call and returns a UUID', () => { + const id = getDeviceId(); + + expect(id).toMatch(UUID_REGEX); + const filePath = join(testDir, '.workos', 'device-id'); + expect(existsSync(filePath)).toBe(true); + expect(readFileSync(filePath, 'utf8')).toBe(id); + }); + + it('returns the same cached UUID on subsequent calls', () => { + const first = getDeviceId(); + const second = getDeviceId(); + expect(first).toBe(second); + }); + + it('reads existing UUID from disk (persists across process restarts)', () => { + // First process writes the ID. + const first = getDeviceId(); + + // Simulate process restart by clearing in-memory cache. + __resetDeviceIdCache(); + + const second = getDeviceId(); + expect(second).toBe(first); + }); + + it('regenerates when the file contains a non-UUID', () => { + const workosDir = join(testDir, '.workos'); + mkdirSync(workosDir, { recursive: true }); + writeFileSync(join(workosDir, 'device-id'), 'not-a-uuid', 'utf8'); + + const id = getDeviceId(); + expect(id).toMatch(UUID_REGEX); + expect(id).not.toBe('not-a-uuid'); + // File should be rewritten with the new UUID. + expect(readFileSync(join(workosDir, 'device-id'), 'utf8')).toBe(id); + }); + + it('falls back to a one-shot UUID when the filesystem is readonly', () => { + // Make the home directory non-writable so mkdirSync throws. + chmodSync(testDir, 0o500); + + const id = getDeviceId(); + expect(id).toMatch(UUID_REGEX); + // File was never created. + expect(existsSync(join(testDir, '.workos', 'device-id'))).toBe(false); + }); + + it('persists fallback UUID across calls within the same process', () => { + chmodSync(testDir, 0o500); + const first = getDeviceId(); + const second = getDeviceId(); + expect(first).toBe(second); + }); +}); diff --git a/src/lib/device-id.ts b/src/lib/device-id.ts new file mode 100644 index 00000000..61ef74aa --- /dev/null +++ b/src/lib/device-id.ts @@ -0,0 +1,57 @@ +/** + * Persistent device identifier for telemetry correlation. + * + * Stored at ~/.workos/device-id as a plain UTF-8 UUID string. Not a secret + * — this is a convenience identifier that survives keyring unavailability. + * Any IO failure falls through to a one-shot UUID for the current session. + */ + +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; +import crypto from 'node:crypto'; + +const UUID_REGEX = /^[0-9a-f-]{36}$/i; + +let cached: string | undefined; + +function getDeviceIdPath(): string { + return path.join(os.homedir(), '.workos', 'device-id'); +} + +/** + * Returns a stable UUID for this device. Lazily creates the file on first + * call. On any IO failure, returns a one-shot UUID scoped to the current + * process — never throws. + */ +export function getDeviceId(): string { + if (cached) return cached; + + const filePath = getDeviceIdPath(); + try { + if (fs.existsSync(filePath)) { + const raw = fs.readFileSync(filePath, 'utf8').trim(); + if (UUID_REGEX.test(raw)) { + cached = raw; + return raw; + } + } + + const id = crypto.randomUUID(); + fs.mkdirSync(path.dirname(filePath), { recursive: true, mode: 0o700 }); + fs.writeFileSync(filePath, id, { encoding: 'utf8', mode: 0o600 }); + cached = id; + return id; + } catch { + // IO failure (readonly FS, permission denied, etc.) — fall through to + // a session-scoped UUID. Cache it so subsequent calls in this process + // return the same value; the next process run will retry IO. + cached = crypto.randomUUID(); + return cached; + } +} + +/** Test seam — resets the in-memory cache between test cases. */ +export function __resetDeviceIdCache(): void { + cached = undefined; +} diff --git a/src/lib/run-with-core.ts b/src/lib/run-with-core.ts index 69971fad..8be93c98 100644 --- a/src/lib/run-with-core.ts +++ b/src/lib/run-with-core.ts @@ -242,6 +242,10 @@ export async function runWithCore(options: InstallerOptions): Promise { // Check for active environment with credentials (covers unclaimed environments) const activeEnv = getActiveEnvironment(); if (activeEnv?.clientId && activeEnv?.apiKey) { + // Classify auth.mode: unclaimed envs deliver telemetry via claim token, + // claimed envs with just an API key use api_key path (dropped by gateway, + // but still worth tagging for cohort analysis). + analytics.setAuthMode(isUnclaimedEnvironment(activeEnv) ? 'claim_token' : 'api_key'); return true; } @@ -257,6 +261,7 @@ export async function runWithCore(options: InstallerOptions): Promise { if (creds) { analytics.setAccessToken(creds.accessToken); analytics.setDistinctId(creds.userId); + analytics.setAuthMode('jwt'); } return true; }), diff --git a/src/utils/analytics.spec.ts b/src/utils/analytics.spec.ts index 24bf8632..3fbcc3c2 100644 --- a/src/utils/analytics.spec.ts +++ b/src/utils/analytics.spec.ts @@ -3,6 +3,7 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; // Mock telemetry client const mockSetGatewayUrl = vi.fn(); const mockSetAccessToken = vi.fn(); +const mockSetClaimTokenAuth = vi.fn(); const mockQueueEvent = vi.fn(); const mockFlush = vi.fn().mockResolvedValue(undefined); const mockReplaceLastEventOfType = vi.fn(); @@ -11,6 +12,7 @@ vi.mock('./telemetry-client.js', () => ({ telemetryClient: { setGatewayUrl: mockSetGatewayUrl, setAccessToken: mockSetAccessToken, + setClaimTokenAuth: mockSetClaimTokenAuth, queueEvent: mockQueueEvent, flush: mockFlush, replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), @@ -27,6 +29,12 @@ vi.mock('uuid', () => ({ v4: () => 'test-session-id-123', })); +// Deterministic device ID for assertions +const TEST_DEVICE_ID = '11111111-1111-4111-8111-111111111111'; +vi.mock('../lib/device-id.js', () => ({ + getDeviceId: () => TEST_DEVICE_ID, +})); + // Mock settings for initForNonInstaller const mockGetLlmGatewayUrl = vi.fn(() => 'https://api.workos.com/llm-gateway'); const mockSettingsConfig = { @@ -48,6 +56,13 @@ vi.mock('../lib/credentials.js', () => ({ getCredentials: () => mockGetCredentials(), })); +// Mock config-store so auth.mode derivation can exercise unclaimed-env path +const mockGetActiveEnvironment = vi.fn(); +vi.mock('../lib/config-store.js', () => ({ + getActiveEnvironment: () => mockGetActiveEnvironment(), + isUnclaimedEnvironment: (env: { type: string }) => env?.type === 'unclaimed', +})); + describe('Analytics', () => { // Need to handle WORKOS_TELEMETRY_ENABLED which is evaluated at import time const originalEnv = process.env.WORKOS_TELEMETRY; @@ -77,6 +92,7 @@ describe('Analytics', () => { telemetryClient: { setGatewayUrl: mockSetGatewayUrl, setAccessToken: mockSetAccessToken, + setClaimTokenAuth: mockSetClaimTokenAuth, queueEvent: mockQueueEvent, flush: mockFlush, replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), @@ -90,11 +106,26 @@ describe('Analytics', () => { vi.doMock('../lib/credentials.js', () => ({ getCredentials: () => mockGetCredentials(), })); + vi.doMock('../lib/device-id.js', () => ({ + getDeviceId: () => TEST_DEVICE_ID, + })); + vi.doMock('../lib/config-store.js', () => ({ + getActiveEnvironment: () => mockGetActiveEnvironment(), + isUnclaimedEnvironment: (env: { type: string }) => env?.type === 'unclaimed', + })); + // Default: no credentials, no unclaimed env, no API key + mockGetCredentials.mockReturnValue(null); + mockGetActiveEnvironment.mockReturnValue(null); + delete process.env.WORKOS_API_KEY; const module = await import('./analytics.js'); Analytics = module.Analytics; analytics = new Analytics(); }); + afterEach(() => { + delete process.env.WORKOS_API_KEY; + }); + describe('setDistinctId', () => { it('stores the distinct ID for later use', () => { analytics.setDistinctId('user-123'); @@ -558,6 +589,117 @@ describe('Analytics', () => { }); }); + describe('auth.mode derivation', () => { + const readAuthMode = () => { + analytics.commandExecuted('test', 1, true); + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'command')[0]; + return event.attributes['auth.mode']; + }; + + it('derives jwt when stored credentials have an access token', () => { + mockGetCredentials.mockReturnValue({ accessToken: 'jwt-token', userId: 'user-1' }); + analytics.initForNonInstaller(); + + expect(readAuthMode()).toBe('jwt'); + }); + + it('derives claim_token when only an unclaimed environment is active', () => { + mockGetCredentials.mockReturnValue(null); + mockGetActiveEnvironment.mockReturnValue({ + type: 'unclaimed', + name: 'dev', + apiKey: 'sk_test', + clientId: 'client_123', + claimToken: 'claim_tok', + }); + analytics.initForNonInstaller(); + + expect(mockSetClaimTokenAuth).toHaveBeenCalledWith('client_123', 'claim_tok'); + expect(readAuthMode()).toBe('claim_token'); + }); + + it('derives api_key when only WORKOS_API_KEY is set', () => { + mockGetCredentials.mockReturnValue(null); + mockGetActiveEnvironment.mockReturnValue(null); + process.env.WORKOS_API_KEY = 'sk_live_abc'; + analytics.initForNonInstaller(); + + expect(readAuthMode()).toBe('api_key'); + }); + + it('derives none when no credentials are available', () => { + mockGetCredentials.mockReturnValue(null); + mockGetActiveEnvironment.mockReturnValue(null); + analytics.initForNonInstaller(); + + expect(readAuthMode()).toBe('none'); + }); + + it('prefers jwt over claim_token when both are present', () => { + mockGetCredentials.mockReturnValue({ accessToken: 'jwt-token' }); + mockGetActiveEnvironment.mockReturnValue({ + type: 'unclaimed', + name: 'dev', + apiKey: 'sk_test', + clientId: 'client_123', + claimToken: 'claim_tok', + }); + analytics.initForNonInstaller(); + + expect(readAuthMode()).toBe('jwt'); + }); + + it('prefers claim_token over api_key when both are present', () => { + mockGetCredentials.mockReturnValue(null); + mockGetActiveEnvironment.mockReturnValue({ + type: 'unclaimed', + name: 'dev', + apiKey: 'sk_test', + clientId: 'client_123', + claimToken: 'claim_tok', + }); + process.env.WORKOS_API_KEY = 'sk_live_abc'; + analytics.initForNonInstaller(); + + expect(readAuthMode()).toBe('claim_token'); + }); + + it('can be overridden by setAuthMode (installer flow)', () => { + analytics.setAuthMode('api_key'); + expect(readAuthMode()).toBe('api_key'); + }); + }); + + describe('device.id and auth.mode on events', () => { + it('includes device.id on session.start events', () => { + analytics.sessionStart('cli', '1.0.0'); + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'session.start')[0]; + expect(event.attributes['device.id']).toBe(TEST_DEVICE_ID); + expect(event.attributes['device.id']).toMatch(/^[0-9a-f-]{36}$/i); + }); + + it('includes device.id and auth.mode on command events', () => { + analytics.commandExecuted('org.list', 100, true); + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'command')[0]; + expect(event.attributes['device.id']).toBe(TEST_DEVICE_ID); + expect(event.attributes['auth.mode']).toBe('none'); + }); + + it('includes device.id and auth.mode on crash events', () => { + analytics.captureUnhandledCrash(new Error('boom')); + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'crash')[0]; + expect(event.attributes['device.id']).toBe(TEST_DEVICE_ID); + expect(event.attributes['auth.mode']).toBe('none'); + }); + + it('includes device.id and auth.mode on session.end events', async () => { + await analytics.shutdown('success'); + const event = mockQueueEvent.mock.calls.find((c) => c[0].type === 'session.end')[0]; + expect(event.attributes['device.id']).toBe(TEST_DEVICE_ID); + expect(event.attributes['auth.mode']).toBe('none'); + }); + }); + describe('replaceLastCommandEvent', () => { it('removes last command event and queues a new one', () => { analytics.replaceLastCommandEvent('organization.list', 150, true, { flags: ['json'] }); @@ -597,6 +739,7 @@ describe('Analytics', () => { telemetryClient: { setGatewayUrl: mockSetGatewayUrl, setAccessToken: mockSetAccessToken, + setClaimTokenAuth: mockSetClaimTokenAuth, queueEvent: mockQueueEvent, flush: mockFlush, replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), @@ -610,6 +753,13 @@ describe('Analytics', () => { vi.doMock('../lib/credentials.js', () => ({ getCredentials: () => mockGetCredentials(), })); + vi.doMock('../lib/device-id.js', () => ({ + getDeviceId: () => TEST_DEVICE_ID, + })); + vi.doMock('../lib/config-store.js', () => ({ + getActiveEnvironment: () => mockGetActiveEnvironment(), + isUnclaimedEnvironment: (env: { type: string }) => env?.type === 'unclaimed', + })); }); it('capture does nothing', async () => { diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index a66a61e0..6b389fca 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -3,6 +3,7 @@ import { v4 as uuidv4 } from 'uuid'; import { debug } from './debug.js'; import { telemetryClient } from './telemetry-client.js'; import type { + AuthMode, SessionStartEvent, SessionEndEvent, StepEvent, @@ -15,6 +16,7 @@ import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; import { getLlmGatewayUrl, getVersion } from '../lib/settings.js'; import { getCredentials } from '../lib/credentials.js'; import { getActiveEnvironment, isUnclaimedEnvironment } from '../lib/config-store.js'; +import { getDeviceId } from '../lib/device-id.js'; import { sanitizeMessage, sanitizeStack } from './crash-reporter.js'; export class Analytics { @@ -23,6 +25,7 @@ export class Analytics { private sessionStartTime: Date; private distinctId?: string; private mode?: 'cli' | 'tui' | 'headless'; + private authMode: AuthMode = 'none'; // Agent metrics tracking private totalInputTokens = 0; @@ -43,6 +46,14 @@ export class Analytics { telemetryClient.setAccessToken(token); } + /** + * Set the auth mode explicitly. Used by installer flows where credential + * resolution happens outside `initForNonInstaller` (see run-with-core.ts). + */ + setAuthMode(mode: AuthMode) { + this.authMode = mode; + } + setGatewayUrl(url: string) { telemetryClient.setGatewayUrl(url); } @@ -69,6 +80,7 @@ export class Analytics { const creds = getCredentials(); if (creds?.accessToken) { telemetryClient.setAccessToken(creds.accessToken); + this.authMode = 'jwt'; } if (creds?.userId) { this.distinctId = creds.userId; @@ -82,10 +94,19 @@ export class Analytics { telemetryClient.setClaimTokenAuth(env.clientId, env.claimToken); // Tag distinctId so unclaimed sessions are identifiable in analytics this.distinctId = this.distinctId ?? `unclaimed:${env.clientId}`; + if (this.authMode === 'none') this.authMode = 'claim_token'; } } catch { // Config-store failure is non-fatal for telemetry } + + // WORKOS_API_KEY covers API-key-only users. Lowest priority — JWT and + // claim-token paths actually deliver telemetry, while api_key is silently + // dropped by the gateway guard; tagging it correctly still matters for + // cohort analysis. + if (this.authMode === 'none' && process.env.WORKOS_API_KEY) { + this.authMode = 'api_key'; + } } setTag(key: string, value: string | boolean | number | null | undefined) { @@ -149,6 +170,8 @@ export class Analytics { const ciProvider = this.detectCiProvider(); return { + 'device.id': getDeviceId(), + 'auth.mode': this.authMode, 'env.os': process.platform, 'env.os_version': osVersion, 'env.node_version': process.version, diff --git a/src/utils/telemetry-types.ts b/src/utils/telemetry-types.ts index 7d84eb99..89fe0cba 100644 --- a/src/utils/telemetry-types.ts +++ b/src/utils/telemetry-types.ts @@ -10,6 +10,8 @@ export interface TelemetryEvent { attributes?: Record; } +export type AuthMode = 'jwt' | 'claim_token' | 'api_key' | 'none'; + export interface SessionStartEvent extends TelemetryEvent { type: 'session.start'; attributes: { @@ -17,6 +19,8 @@ export interface SessionStartEvent extends TelemetryEvent { 'installer.mode': 'cli' | 'tui' | 'headless'; 'workos.user_id'?: string; 'workos.org_id'?: string; + 'device.id': string; + 'auth.mode': AuthMode; 'env.os': string; 'env.os_version': string; 'env.node_version': string; @@ -72,6 +76,8 @@ export interface CommandEvent extends TelemetryEvent { 'command.flags'?: string; 'cli.version': string; 'workos.user_id'?: string; + 'device.id': string; + 'auth.mode': AuthMode; 'env.os': string; 'env.os_version': string; 'env.node_version': string; @@ -90,6 +96,8 @@ export interface CrashEvent extends TelemetryEvent { 'crash.command'?: string; 'cli.version': string; 'workos.user_id'?: string; + 'device.id': string; + 'auth.mode': AuthMode; 'env.os': string; 'env.os_version': string; 'env.node_version': string; From 2c05364aeae97f39b54b5d9816a893d56b1586b0 Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Wed, 15 Apr 2026 16:37:26 -0500 Subject: [PATCH 12/15] feat(telemetry): structured termination.reason and error.code on command events Replace the boolean `command.success` as the primary outcome dimension with a structured `termination.reason` enum (success | cancelled | auth_required | validation_error | api_error | crash) plus a `error.code` string. - `exitWithError`, `exitWithCode`, `exitWithAuthRequired` now patch the provisional command event via `analytics.recordTermination` before calling `process.exit`. Previously the queued provisional event (success true, duration 0) was persisted as-is by store-forward, producing misleading dashboard data. - `exitWithError` now honors the string error code for exit mapping: `auth_required` -> 4, `cancelled` -> 2, `http_*`/`not_found`/`unknown_error` -> 1 (api_error), everything else -> 1 (validation_error). Previously it always exited 1. - New `TelemetryClient.patchLastEventOfType` helper mutates a queued event in place. Unlike `replaceLastEventOfType`, it preserves the event so multiple callers can update fields incrementally. - `wrapCommandHandler` records `reason: 'success'` on clean completion and `reason: 'crash'` with `error.name` on uncaught throw. - `command.success` remains on CommandEvent for backward-compat. - `api.status`/`api.code`/`api.resource` fields added to CommandEvent and plumbed through `recordTermination` for Phase 3 consumption. Review passed (2 medium + 1 low non-blocking; low fixed in same commit for crash-path test symmetry). --- src/utils/analytics.spec.ts | 110 ++++++++++++++++++++++++++++ src/utils/analytics.ts | 31 ++++++++ src/utils/command-telemetry.spec.ts | 39 ++++++++++ src/utils/command-telemetry.ts | 8 +- src/utils/exit-codes.spec.ts | 95 +++++++++++++++++++++++- src/utils/exit-codes.ts | 29 ++++++++ src/utils/output.spec.ts | 79 +++++++++++++++++++- src/utils/output.ts | 7 +- src/utils/telemetry-client.ts | 19 +++++ src/utils/telemetry-types.ts | 19 +++++ 10 files changed, 431 insertions(+), 5 deletions(-) diff --git a/src/utils/analytics.spec.ts b/src/utils/analytics.spec.ts index 3fbcc3c2..3f429e02 100644 --- a/src/utils/analytics.spec.ts +++ b/src/utils/analytics.spec.ts @@ -7,6 +7,19 @@ const mockSetClaimTokenAuth = vi.fn(); const mockQueueEvent = vi.fn(); const mockFlush = vi.fn().mockResolvedValue(undefined); const mockReplaceLastEventOfType = vi.fn(); +// Holds the queue of events that patchLastEventOfType should operate on. +// Tests populate via mockQueueEvent.mock.calls + manual reset. +const patchedEvents: Array<{ type: string; attributes: Record }> = []; +const mockPatchLastEventOfType = vi.fn( + (type: string, mutator: (event: { type: string; attributes: Record }) => void) => { + for (let i = patchedEvents.length - 1; i >= 0; i--) { + if (patchedEvents[i].type === type) { + mutator(patchedEvents[i]); + return; + } + } + }, +); vi.mock('./telemetry-client.js', () => ({ telemetryClient: { @@ -16,6 +29,10 @@ vi.mock('./telemetry-client.js', () => ({ queueEvent: mockQueueEvent, flush: mockFlush, replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), + patchLastEventOfType: ( + type: string, + mutator: (event: { type: string; attributes: Record }) => void, + ) => mockPatchLastEventOfType(type, mutator), }, })); @@ -96,6 +113,10 @@ describe('Analytics', () => { queueEvent: mockQueueEvent, flush: mockFlush, replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), + patchLastEventOfType: ( + type: string, + mutator: (event: { type: string; attributes: Record }) => void, + ) => mockPatchLastEventOfType(type, mutator), }, })); vi.doMock('../lib/settings.js', () => ({ @@ -729,6 +750,82 @@ describe('Analytics', () => { expect(event.attributes['command.error_message']).toBe('oops'); }); }); + + describe('recordTermination', () => { + beforeEach(() => { + patchedEvents.length = 0; + }); + + it('patches the last command event with termination.reason and syncs command.success', () => { + const commandEvent = { + type: 'command', + attributes: { 'command.success': true } as Record, + }; + patchedEvents.push(commandEvent); + + analytics.recordTermination('auth_required', 'auth_required'); + + expect(mockPatchLastEventOfType).toHaveBeenCalledWith('command', expect.any(Function)); + expect(commandEvent.attributes['termination.reason']).toBe('auth_required'); + expect(commandEvent.attributes['error.code']).toBe('auth_required'); + expect(commandEvent.attributes['command.success']).toBe(false); + }); + + it('sets command.success true only when reason is success', () => { + const commandEvent = { + type: 'command', + attributes: { 'command.success': false } as Record, + }; + patchedEvents.push(commandEvent); + + analytics.recordTermination('success'); + + expect(commandEvent.attributes['termination.reason']).toBe('success'); + expect(commandEvent.attributes['command.success']).toBe(true); + expect(commandEvent.attributes['error.code']).toBeUndefined(); + }); + + it('applies api context when provided', () => { + const commandEvent = { + type: 'command', + attributes: {} as Record, + }; + patchedEvents.push(commandEvent); + + analytics.recordTermination('api_error', 'http_500', { + status: 500, + code: 'internal_server_error', + resource: 'organizations', + }); + + expect(commandEvent.attributes['api.status']).toBe(500); + expect(commandEvent.attributes['api.code']).toBe('internal_server_error'); + expect(commandEvent.attributes['api.resource']).toBe('organizations'); + }); + + it('skips undefined api context fields', () => { + const commandEvent = { + type: 'command', + attributes: {} as Record, + }; + patchedEvents.push(commandEvent); + + analytics.recordTermination('api_error', 'http_404', { resource: 'users' }); + + expect(commandEvent.attributes['api.resource']).toBe('users'); + expect(commandEvent.attributes['api.status']).toBeUndefined(); + expect(commandEvent.attributes['api.code']).toBeUndefined(); + }); + + it('is a no-op when no command event is queued', () => { + // patchedEvents is empty — helper called from non-command context. + expect(() => analytics.recordTermination('cancelled')).not.toThrow(); + // The mock still forwards the call; the real telemetryClient would + // iterate its queue and return silently. Smoke test: mutator never + // fires because there's no matching event. + expect(mockPatchLastEventOfType).toHaveBeenCalledWith('command', expect.any(Function)); + }); + }); }); describe('with telemetry disabled', () => { @@ -743,6 +840,10 @@ describe('Analytics', () => { queueEvent: mockQueueEvent, flush: mockFlush, replaceLastEventOfType: (...args: unknown[]) => mockReplaceLastEventOfType(...args), + patchLastEventOfType: ( + type: string, + mutator: (event: { type: string; attributes: Record }) => void, + ) => mockPatchLastEventOfType(type, mutator), }, })); vi.doMock('../lib/settings.js', () => ({ @@ -862,5 +963,14 @@ describe('Analytics', () => { expect(mockReplaceLastEventOfType).not.toHaveBeenCalled(); expect(mockQueueEvent).not.toHaveBeenCalled(); }); + + it('recordTermination does nothing', async () => { + const { Analytics } = await import('./analytics.js'); + const analytics = new Analytics(); + + analytics.recordTermination('success'); + + expect(mockPatchLastEventOfType).not.toHaveBeenCalled(); + }); }); }); diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index 6b389fca..bf153d5e 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -11,6 +11,7 @@ import type { AgentLLMEvent, CommandEvent, CrashEvent, + TerminationReason, } from './telemetry-types.js'; import { WORKOS_TELEMETRY_ENABLED } from '../lib/constants.js'; import { getLlmGatewayUrl, getVersion } from '../lib/settings.js'; @@ -310,6 +311,36 @@ export class Analytics { this.commandExecuted(name, durationMs, success, options); } + /** + * Patch the last queued command event with structured termination info. + * Sets `termination.reason` and (optionally) `error.code` / `api.*`. + * Also syncs `command.success` so the legacy boolean stays consistent + * with the new reason enum. + * + * No-op when no command event is queued — covers helpers fired from + * installer context (session events) rather than command context. + * + * `apiContext` is included now so Phase 3 can enrich API-failure events + * without adding another method. + */ + recordTermination( + reason: TerminationReason, + errorCode?: string, + apiContext?: { status?: number; code?: string; resource?: string }, + ): void { + if (!WORKOS_TELEMETRY_ENABLED) return; + + telemetryClient.patchLastEventOfType('command', (event) => { + const attrs = (event as CommandEvent).attributes; + attrs['termination.reason'] = reason; + attrs['command.success'] = reason === 'success'; + if (errorCode) attrs['error.code'] = errorCode; + if (apiContext?.status !== undefined) attrs['api.status'] = apiContext.status; + if (apiContext?.code) attrs['api.code'] = apiContext.code; + if (apiContext?.resource) attrs['api.resource'] = apiContext.resource; + }); + } + captureUnhandledCrash(error: Error, options?: { command?: string; version?: string }) { if (!WORKOS_TELEMETRY_ENABLED) return; diff --git a/src/utils/command-telemetry.spec.ts b/src/utils/command-telemetry.spec.ts index d0095fe0..7dab84f8 100644 --- a/src/utils/command-telemetry.spec.ts +++ b/src/utils/command-telemetry.spec.ts @@ -3,11 +3,13 @@ import { resolveCanonicalName, extractUserFlags, commandTelemetryMiddleware, wra const mockCommandExecuted = vi.fn(); const mockReplaceLastCommandEvent = vi.fn(); +const mockRecordTermination = vi.fn(); vi.mock('./analytics.js', () => ({ analytics: { commandExecuted: (...args: unknown[]) => mockCommandExecuted(...args), replaceLastCommandEvent: (...args: unknown[]) => mockReplaceLastCommandEvent(...args), + recordTermination: (...args: unknown[]) => mockRecordTermination(...args), }, })); @@ -115,6 +117,23 @@ describe('command-telemetry', () => { expect(duration).toBeGreaterThanOrEqual(100); }); + it('records termination reason "success" after replacing the provisional event', async () => { + const handler = vi.fn().mockResolvedValue(undefined); + const wrapped = wrapCommandHandler(handler); + + await wrapped({ + __telemetryCommandName: 'organization.list', + __telemetryStartTime: Date.now(), + __telemetryFlags: [], + }); + + expect(mockRecordTermination).toHaveBeenCalledWith('success'); + // Order matters: replace re-queues the event, then recordTermination patches it. + const replaceCallOrder = mockReplaceLastCommandEvent.mock.invocationCallOrder[0]; + const terminationCallOrder = mockRecordTermination.mock.invocationCallOrder[0]; + expect(terminationCallOrder).toBeGreaterThan(replaceCallOrder); + }); + it('replaces provisional event on failure with error', async () => { const error = new Error('command failed'); const handler = vi.fn().mockRejectedValue(error); @@ -135,6 +154,26 @@ describe('command-telemetry', () => { ); }); + it('records termination reason "crash" with error name on uncaught throw', async () => { + const error = new TypeError('boom'); + const handler = vi.fn().mockRejectedValue(error); + const wrapped = wrapCommandHandler(handler); + + await expect( + wrapped({ + __telemetryCommandName: 'organization.list', + __telemetryStartTime: Date.now(), + __telemetryFlags: [], + }), + ).rejects.toBe(error); + + expect(mockRecordTermination).toHaveBeenCalledWith('crash', 'TypeError'); + // Same ordering contract as success path: replace then patch. + const replaceCallOrder = mockReplaceLastCommandEvent.mock.invocationCallOrder[0]; + const terminationCallOrder = mockRecordTermination.mock.invocationCallOrder[0]; + expect(terminationCallOrder).toBeGreaterThan(replaceCallOrder); + }); + it('re-throws the original error', async () => { const error = new Error('original'); const handler = vi.fn().mockRejectedValue(error); diff --git a/src/utils/command-telemetry.ts b/src/utils/command-telemetry.ts index facd812c..14fc4fa9 100644 --- a/src/utils/command-telemetry.ts +++ b/src/utils/command-telemetry.ts @@ -81,14 +81,20 @@ export function wrapCommandHandler( try { await handler(argv); - // Replace the provisional event with the real one + // Replace the provisional event with the real one, then patch in + // the structured termination reason. Order matters: replace re-queues, + // then recordTermination mutates the re-queued event in place. analytics.replaceLastCommandEvent(commandName, Date.now() - startTime, true, { flags }); + analytics.recordTermination('success'); } catch (error) { const err = error instanceof Error ? error : new Error(String(error)); analytics.replaceLastCommandEvent(commandName, Date.now() - startTime, false, { error: err, flags, }); + // Uncaught throw = crash. Clean exits (exitWithError/exitWithCode) + // already recorded their own termination reason before exiting. + analytics.recordTermination('crash', err.name); throw error; } finally { // Flush in-process so events are sent immediately, not deferred to next invocation. diff --git a/src/utils/exit-codes.spec.ts b/src/utils/exit-codes.spec.ts index efd7fd45..ab4c69ed 100644 --- a/src/utils/exit-codes.spec.ts +++ b/src/utils/exit-codes.spec.ts @@ -4,8 +4,15 @@ vi.mock('./output.js', () => ({ outputError: vi.fn(), })); +const mockRecordTermination = vi.fn(); +vi.mock('./analytics.js', () => ({ + analytics: { + recordTermination: (...args: unknown[]) => mockRecordTermination(...args), + }, +})); + const { outputError } = await import('./output.js'); -const { ExitCode, exitWithCode, exitWithAuthRequired } = await import('./exit-codes.js'); +const { ExitCode, exitWithCode, exitWithAuthRequired, resolveErrorCode } = await import('./exit-codes.js'); const { setInteractionMode, resetInteractionModeForTests } = await import('./interaction-mode.js'); describe('exit-codes', () => { @@ -22,6 +29,55 @@ describe('exit-codes', () => { }); }); + describe('resolveErrorCode', () => { + it('maps auth_required to exit 4', () => { + expect(resolveErrorCode('auth_required')).toEqual({ + reason: 'auth_required', + exit: ExitCode.AUTH_REQUIRED, + }); + }); + + it('maps cancelled to exit 2', () => { + expect(resolveErrorCode('cancelled')).toEqual({ + reason: 'cancelled', + exit: ExitCode.CANCELLED, + }); + }); + + it('maps not_found and unknown_error to api_error + exit 1', () => { + expect(resolveErrorCode('not_found')).toEqual({ + reason: 'api_error', + exit: ExitCode.GENERAL_ERROR, + }); + expect(resolveErrorCode('unknown_error')).toEqual({ + reason: 'api_error', + exit: ExitCode.GENERAL_ERROR, + }); + }); + + it('maps http_* prefixed codes to api_error + exit 1', () => { + expect(resolveErrorCode('http_401')).toEqual({ + reason: 'api_error', + exit: ExitCode.GENERAL_ERROR, + }); + expect(resolveErrorCode('http_500')).toEqual({ + reason: 'api_error', + exit: ExitCode.GENERAL_ERROR, + }); + }); + + it('falls back to validation_error + exit 1 for unknown codes', () => { + expect(resolveErrorCode('bad_email')).toEqual({ + reason: 'validation_error', + exit: ExitCode.GENERAL_ERROR, + }); + expect(resolveErrorCode('')).toEqual({ + reason: 'validation_error', + exit: ExitCode.GENERAL_ERROR, + }); + }); + }); + describe('exitWithCode', () => { it('exits with the given code', () => { const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); @@ -45,6 +101,36 @@ describe('exit-codes', () => { expect(exitSpy).toHaveBeenCalledWith(0); exitSpy.mockRestore(); }); + + it('records termination reason derived from exit code before exiting', () => { + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithCode(ExitCode.CANCELLED); + expect(mockRecordTermination).toHaveBeenCalledWith('cancelled', undefined); + + mockRecordTermination.mockClear(); + exitWithCode(ExitCode.AUTH_REQUIRED); + expect(mockRecordTermination).toHaveBeenCalledWith('auth_required', undefined); + + mockRecordTermination.mockClear(); + exitWithCode(ExitCode.GENERAL_ERROR); + expect(mockRecordTermination).toHaveBeenCalledWith('validation_error', undefined); + + mockRecordTermination.mockClear(); + exitWithCode(ExitCode.SUCCESS); + expect(mockRecordTermination).toHaveBeenCalledWith('success', undefined); + + exitSpy.mockRestore(); + }); + + it('forwards error.code to recordTermination when error provided', () => { + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithCode(ExitCode.GENERAL_ERROR, { code: 'bad_email', message: 'bad' }); + expect(mockRecordTermination).toHaveBeenCalledWith('validation_error', 'bad_email'); + + exitSpy.mockRestore(); + }); }); describe('exitWithAuthRequired', () => { @@ -88,5 +174,12 @@ describe('exit-codes', () => { expect(call.recovery?.hints[0].command).toBeUndefined(); exitSpy.mockRestore(); }); + + it('records termination reason auth_required with error.code before exit', () => { + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + exitWithAuthRequired(); + expect(mockRecordTermination).toHaveBeenCalledWith('auth_required', 'auth_required'); + exitSpy.mockRestore(); + }); }); }); diff --git a/src/utils/exit-codes.ts b/src/utils/exit-codes.ts index b2283b38..f0a23f34 100644 --- a/src/utils/exit-codes.ts +++ b/src/utils/exit-codes.ts @@ -7,10 +7,12 @@ * 4 = Authentication required */ +import { analytics } from './analytics.js'; import { outputError, type StructuredError } from './output.js'; import { formatWorkOSCommand } from './command-invocation.js'; import { authLoginRecovery } from './recovery-hints.js'; import { getInteractionMode } from './interaction-mode.js'; +import type { TerminationReason } from './telemetry-types.js'; export const ExitCode = { SUCCESS: 0, @@ -21,11 +23,38 @@ export const ExitCode = { export type ExitCodeValue = (typeof ExitCode)[keyof typeof ExitCode]; +const ERROR_CODE_MAP: Record = { + auth_required: { reason: 'auth_required', exit: ExitCode.AUTH_REQUIRED }, + cancelled: { reason: 'cancelled', exit: ExitCode.CANCELLED }, + not_found: { reason: 'api_error', exit: ExitCode.GENERAL_ERROR }, + unknown_error: { reason: 'api_error', exit: ExitCode.GENERAL_ERROR }, +}; + +export function resolveErrorCode(code: string): { + reason: TerminationReason; + exit: ExitCodeValue; +} { + const mapped = ERROR_CODE_MAP[code]; + if (mapped) return mapped; + if (code.startsWith('http_')) { + return { reason: 'api_error', exit: ExitCode.GENERAL_ERROR }; + } + return { reason: 'validation_error', exit: ExitCode.GENERAL_ERROR }; +} + +function reasonForExitCode(code: ExitCodeValue): TerminationReason { + if (code === ExitCode.AUTH_REQUIRED) return 'auth_required'; + if (code === ExitCode.CANCELLED) return 'cancelled'; + if (code === ExitCode.SUCCESS) return 'success'; + return 'validation_error'; +} + /** Exit with a specific code, optionally writing a structured error first. */ export function exitWithCode(code: ExitCodeValue, error?: StructuredError): never { if (error) { outputError(error); } + analytics.recordTermination(reasonForExitCode(code), error?.code); process.exit(code); } diff --git a/src/utils/output.spec.ts b/src/utils/output.spec.ts index 125e7eec..6d4c5922 100644 --- a/src/utils/output.spec.ts +++ b/src/utils/output.spec.ts @@ -1,5 +1,12 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +const mockRecordTermination = vi.fn(); +vi.mock('./analytics.js', () => ({ + analytics: { + recordTermination: (...args: unknown[]) => mockRecordTermination(...args), + }, +})); + const { resolveOutputMode, resolveEffectiveOutputMode, @@ -184,7 +191,11 @@ describe('output', () => { }); describe('exitWithError', () => { - it('writes error and exits with code 1', () => { + beforeEach(() => { + mockRecordTermination.mockClear(); + }); + + it('writes error and exits with code 1 for unknown codes', () => { setOutputMode('json'); const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); @@ -198,5 +209,71 @@ describe('output', () => { errorSpy.mockRestore(); exitSpy.mockRestore(); }); + + it('exits with code 4 for auth_required', () => { + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithError({ code: 'auth_required', message: 'Not logged in' }); + + expect(exitSpy).toHaveBeenCalledWith(4); + expect(mockRecordTermination).toHaveBeenCalledWith('auth_required', 'auth_required'); + + errorSpy.mockRestore(); + exitSpy.mockRestore(); + }); + + it('exits with code 2 for cancelled', () => { + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithError({ code: 'cancelled', message: 'User cancelled' }); + + expect(exitSpy).toHaveBeenCalledWith(2); + expect(mockRecordTermination).toHaveBeenCalledWith('cancelled', 'cancelled'); + + errorSpy.mockRestore(); + exitSpy.mockRestore(); + }); + + it('records validation_error reason for unknown codes', () => { + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithError({ code: 'bad_email', message: 'bad input' }); + + expect(mockRecordTermination).toHaveBeenCalledWith('validation_error', 'bad_email'); + expect(exitSpy).toHaveBeenCalledWith(1); + + errorSpy.mockRestore(); + exitSpy.mockRestore(); + }); + + it('records api_error reason for http_* codes', () => { + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithError({ code: 'http_429', message: 'rate limited' }); + + expect(mockRecordTermination).toHaveBeenCalledWith('api_error', 'http_429'); + expect(exitSpy).toHaveBeenCalledWith(1); + + errorSpy.mockRestore(); + exitSpy.mockRestore(); + }); + + it('writes stderr before recording termination (flush order)', () => { + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithError({ code: 'auth_required', message: 'bye' }); + + const stderrOrder = errorSpy.mock.invocationCallOrder[0]; + const terminationOrder = mockRecordTermination.mock.invocationCallOrder[0]; + expect(stderrOrder).toBeLessThan(terminationOrder); + + errorSpy.mockRestore(); + exitSpy.mockRestore(); + }); }); }); diff --git a/src/utils/output.ts b/src/utils/output.ts index 6b6aba26..a7c46b91 100644 --- a/src/utils/output.ts +++ b/src/utils/output.ts @@ -7,6 +7,8 @@ */ import chalk from 'chalk'; +import { analytics } from './analytics.js'; +import { resolveErrorCode } from './exit-codes.js'; import { formatTable, type TableColumn } from './table.js'; import type { RecoveryHints } from './recovery-hints.js'; import type { InteractionModeInfo } from './interaction-mode.js'; @@ -132,8 +134,9 @@ export function outputTable(columns: TableColumn[], rows: string[][], rawData?: } } -/** Exit with a structured error. Writes error then exits with code 1. */ export function exitWithError(error: StructuredError): never { outputError(error); - process.exit(1); + const { reason, exit } = resolveErrorCode(error.code); + analytics.recordTermination(reason, error.code); + process.exit(exit); } diff --git a/src/utils/telemetry-client.ts b/src/utils/telemetry-client.ts index 3df750c3..8b2471c7 100644 --- a/src/utils/telemetry-client.ts +++ b/src/utils/telemetry-client.ts @@ -50,6 +50,25 @@ export class TelemetryClient { } } + /** + * Apply a mutator to the last queued event of a given type, in place. + * Unlike `replaceLastEventOfType`, this preserves the event so multiple + * callers can update fields incrementally (e.g., termination reason + + * api context). No-op when no matching event is queued — which covers + * helpers called outside command context (installer session events). + */ + patchLastEventOfType( + type: TelemetryEvent['type'], + mutator: (event: TelemetryEvent) => void, + ): void { + for (let i = this.events.length - 1; i >= 0; i--) { + if (this.events[i].type === type) { + mutator(this.events[i]); + return; + } + } + } + /** * Queue multiple pre-formed events (used by store-forward recovery). */ diff --git a/src/utils/telemetry-types.ts b/src/utils/telemetry-types.ts index 89fe0cba..5a76115a 100644 --- a/src/utils/telemetry-types.ts +++ b/src/utils/telemetry-types.ts @@ -12,6 +12,20 @@ export interface TelemetryEvent { export type AuthMode = 'jwt' | 'claim_token' | 'api_key' | 'none'; +/** + * Structured outcome dimension for command events. Supersedes the boolean + * `command.success` as the primary categorization (`command.success` remains + * for backward-compat). Populated by `analytics.recordTermination()` just + * before `process.exit`. + */ +export type TerminationReason = + | 'success' + | 'cancelled' + | 'auth_required' + | 'validation_error' + | 'api_error' + | 'crash'; + export interface SessionStartEvent extends TelemetryEvent { type: 'session.start'; attributes: { @@ -74,6 +88,11 @@ export interface CommandEvent extends TelemetryEvent { 'command.error_type'?: string; 'command.error_message'?: string; 'command.flags'?: string; + 'termination.reason'?: TerminationReason; + 'error.code'?: string; + 'api.status'?: number; + 'api.code'?: string; + 'api.resource'?: string; 'cli.version': string; 'workos.user_id'?: string; 'device.id': string; From 086a53b5000ea6f402d4da974b180a0cee6ab65b Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Wed, 15 Apr 2026 17:12:42 -0500 Subject: [PATCH 13/15] feat(telemetry): api.status, api.code, api.resource on API-failure command events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires `createApiErrorHandler` through to the provisional command event via an optional `apiContext` param on `exitWithError`, populating `api.status`, `api.code`, and `api.resource` attributes on API-failure command events. Also treats `apiContext` presence as ground truth for `api_error` termination reason — WorkOS error codes like `rate_limited` or `validation_error` would otherwise fall through `resolveErrorCode` to `validation_error`, hiding legitimate API failures from api_error dashboards. The `CommandEvent` type already had the `api.*` attribute slots (added as forward-compat in Phase 2); only the wiring and tests are new. --- src/lib/api-error-handler.spec.ts | 114 ++++++++++++++++++++++++------ src/lib/api-error-handler.ts | 17 ++++- src/utils/output.spec.ts | 28 ++++++-- src/utils/output.ts | 11 ++- 4 files changed, 140 insertions(+), 30 deletions(-) diff --git a/src/lib/api-error-handler.spec.ts b/src/lib/api-error-handler.spec.ts index 933f4c40..b1025d87 100644 --- a/src/lib/api-error-handler.spec.ts +++ b/src/lib/api-error-handler.spec.ts @@ -1,7 +1,15 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; -import { WorkOSApiError } from './workos-api.js'; -import { createApiErrorHandler } from './api-error-handler.js'; -import { setOutputMode } from '../utils/output.js'; + +const mockRecordTermination = vi.fn(); +vi.mock('../utils/analytics.js', () => ({ + analytics: { + recordTermination: (...args: unknown[]) => mockRecordTermination(...args), + }, +})); + +const { WorkOSApiError } = await import('./workos-api.js'); +const { createApiErrorHandler } = await import('./api-error-handler.js'); +const { setOutputMode } = await import('../utils/output.js'); describe('createApiErrorHandler', () => { let stderrOutput: string[]; @@ -11,6 +19,7 @@ describe('createApiErrorHandler', () => { setOutputMode('json'); stderrOutput = []; exitCode = undefined; + mockRecordTermination.mockClear(); vi.spyOn(console, 'error').mockImplementation((...args: unknown[]) => { stderrOutput.push(args.map(String).join(' ')); }); @@ -29,6 +38,24 @@ describe('createApiErrorHandler', () => { return JSON.parse(stderrOutput[0]); } + function makeSdkError( + status: number, + message: string, + extras?: { code?: string; requestID?: string; errors?: Array<{ message: string }> }, + ) { + const err = new Error(message) as Error & { + status: number; + requestID: string; + code?: string; + errors?: Array<{ message: string }>; + }; + err.status = status; + err.requestID = extras?.requestID ?? 'req_test'; + if (extras?.code) err.code = extras.code; + if (extras?.errors) err.errors = extras.errors; + return err; + } + describe('WorkOSApiError (raw fetch)', () => { it('handles 401 with friendly message', () => { const handler = createApiErrorHandler('Organization'); @@ -69,24 +96,6 @@ describe('createApiErrorHandler', () => { }); describe('SDK exceptions (@workos-inc/node)', () => { - function makeSdkError( - status: number, - message: string, - extras?: { code?: string; requestID?: string; errors?: Array<{ message: string }> }, - ) { - const err = new Error(message) as Error & { - status: number; - requestID: string; - code?: string; - errors?: Array<{ message: string }>; - }; - err.status = status; - err.requestID = extras?.requestID ?? 'req_test'; - if (extras?.code) err.code = extras.code; - if (extras?.errors) err.errors = extras.errors; - return err; - } - it('handles 401 (UnauthorizedException)', () => { const handler = createApiErrorHandler('Organization'); handler(makeSdkError(401, 'Could not authorize the request')); @@ -153,4 +162,67 @@ describe('createApiErrorHandler', () => { expect(parseError().error.code).toBe('unknown_error'); }); }); + + describe('telemetry apiContext', () => { + // Note: process.exit is mocked (doesn't actually exit), so after the + // matched branch's exitWithError returns, the handler continues to the + // fallback branch and emits a second recordTermination call. We assert + // on `mock.calls[0]` (the branch-of-interest call) to keep branch + // assertions isolated from the trailing fallback. + + it('WorkOSApiError path populates apiContext with status/code/resource', () => { + const handler = createApiErrorHandler('Organization'); + handler(new WorkOSApiError('Unauthorized', 401)); + + expect(mockRecordTermination.mock.calls[0]).toEqual([ + 'api_error', + 'http_401', + { status: 401, code: 'http_401', resource: 'Organization' }, + ]); + }); + + it('WorkOSApiError uses error.code in apiContext when present', () => { + const handler = createApiErrorHandler('User'); + handler(new WorkOSApiError('Validation failed', 422, 'validation_error')); + + expect(mockRecordTermination.mock.calls[0]).toEqual([ + 'api_error', + 'validation_error', + { status: 422, code: 'validation_error', resource: 'User' }, + ]); + }); + + it('SDK exception path populates apiContext with status/code/resource', () => { + const handler = createApiErrorHandler('Organization'); + handler(makeSdkError(429, 'Rate limit exceeded', { code: 'rate_limited' })); + + expect(mockRecordTermination.mock.calls[0]).toEqual([ + 'api_error', + 'rate_limited', + { status: 429, code: 'rate_limited', resource: 'Organization' }, + ]); + }); + + it('SDK exception falls back to http_{status} when code absent', () => { + const handler = createApiErrorHandler('Role'); + handler(makeSdkError(404, 'Not found')); + + expect(mockRecordTermination.mock.calls[0]).toEqual([ + 'api_error', + 'http_404', + { status: 404, code: 'http_404', resource: 'Role' }, + ]); + }); + + it('fallback (generic Error) populates resource only — no status/code', () => { + const handler = createApiErrorHandler('Thing'); + handler(new Error('Network timeout')); + + expect(mockRecordTermination.mock.calls[0]).toEqual([ + 'api_error', + 'unknown_error', + { resource: 'Thing' }, + ]); + }); + }); }); diff --git a/src/lib/api-error-handler.ts b/src/lib/api-error-handler.ts index 4940e72a..2ba78f26 100644 --- a/src/lib/api-error-handler.ts +++ b/src/lib/api-error-handler.ts @@ -25,8 +25,9 @@ export function createApiErrorHandler(resourceName: string) { return (error: unknown): never => { // 1. Raw fetch errors (workos-api.ts) if (error instanceof WorkOSApiError) { + const code = error.code ?? `http_${error.statusCode}`; exitWithError({ - code: error.code ?? `http_${error.statusCode}`, + code, message: error.statusCode === 401 ? 'Invalid API key. Check your environment configuration.' @@ -36,13 +37,19 @@ export function createApiErrorHandler(resourceName: string) { ? error.errors.map((e) => e.message).join(', ') : error.message, details: error.errors, + apiContext: { + status: error.statusCode, + code, + resource: resourceName, + }, }); } // 2. SDK exceptions (@workos-inc/node) if (isSdkException(error)) { + const code = error.code ?? `http_${error.status}`; exitWithError({ - code: error.code ?? `http_${error.status}`, + code, message: error.status === 401 ? 'Invalid API key. Check your environment configuration.' @@ -52,6 +59,11 @@ export function createApiErrorHandler(resourceName: string) { ? error.errors.map((e) => e.message).join(', ') : error.message, details: error.errors, + apiContext: { + status: error.status, + code, + resource: resourceName, + }, }); } @@ -59,6 +71,7 @@ export function createApiErrorHandler(resourceName: string) { exitWithError({ code: 'unknown_error', message: error instanceof Error ? error.message : 'Unknown error', + apiContext: { resource: resourceName }, }); }; } diff --git a/src/utils/output.spec.ts b/src/utils/output.spec.ts index 6d4c5922..54f9a925 100644 --- a/src/utils/output.spec.ts +++ b/src/utils/output.spec.ts @@ -217,7 +217,7 @@ describe('output', () => { exitWithError({ code: 'auth_required', message: 'Not logged in' }); expect(exitSpy).toHaveBeenCalledWith(4); - expect(mockRecordTermination).toHaveBeenCalledWith('auth_required', 'auth_required'); + expect(mockRecordTermination).toHaveBeenCalledWith('auth_required', 'auth_required', undefined); errorSpy.mockRestore(); exitSpy.mockRestore(); @@ -230,7 +230,7 @@ describe('output', () => { exitWithError({ code: 'cancelled', message: 'User cancelled' }); expect(exitSpy).toHaveBeenCalledWith(2); - expect(mockRecordTermination).toHaveBeenCalledWith('cancelled', 'cancelled'); + expect(mockRecordTermination).toHaveBeenCalledWith('cancelled', 'cancelled', undefined); errorSpy.mockRestore(); exitSpy.mockRestore(); @@ -242,7 +242,7 @@ describe('output', () => { exitWithError({ code: 'bad_email', message: 'bad input' }); - expect(mockRecordTermination).toHaveBeenCalledWith('validation_error', 'bad_email'); + expect(mockRecordTermination).toHaveBeenCalledWith('validation_error', 'bad_email', undefined); expect(exitSpy).toHaveBeenCalledWith(1); errorSpy.mockRestore(); @@ -255,7 +255,7 @@ describe('output', () => { exitWithError({ code: 'http_429', message: 'rate limited' }); - expect(mockRecordTermination).toHaveBeenCalledWith('api_error', 'http_429'); + expect(mockRecordTermination).toHaveBeenCalledWith('api_error', 'http_429', undefined); expect(exitSpy).toHaveBeenCalledWith(1); errorSpy.mockRestore(); @@ -275,5 +275,25 @@ describe('output', () => { errorSpy.mockRestore(); exitSpy.mockRestore(); }); + + it('forwards apiContext to recordTermination', () => { + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithError({ + code: 'http_500', + message: 'server exploded', + apiContext: { status: 500, code: 'http_500', resource: 'Organization' }, + }); + + expect(mockRecordTermination).toHaveBeenCalledWith('api_error', 'http_500', { + status: 500, + code: 'http_500', + resource: 'Organization', + }); + + errorSpy.mockRestore(); + exitSpy.mockRestore(); + }); }); }); diff --git a/src/utils/output.ts b/src/utils/output.ts index a7c46b91..8096bc48 100644 --- a/src/utils/output.ts +++ b/src/utils/output.ts @@ -134,9 +134,14 @@ export function outputTable(columns: TableColumn[], rows: string[][], rawData?: } } -export function exitWithError(error: StructuredError): never { +export function exitWithError( + error: StructuredError & { + apiContext?: { status?: number; code?: string; resource?: string }; + }, +): never { outputError(error); - const { reason, exit } = resolveErrorCode(error.code); - analytics.recordTermination(reason, error.code); + const { reason: codeReason, exit } = resolveErrorCode(error.code); + const reason = error.apiContext ? 'api_error' : codeReason; + analytics.recordTermination(reason, error.code, error.apiContext); process.exit(exit); } From e23315436c74d68ece37797f03a8dda5e20be82d Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Tue, 12 May 2026 15:38:16 -0500 Subject: [PATCH 14/15] fix(telemetry): address review findings across telemetry signal commits - Fix duration=0 on exit-path events: middleware now sets analytics.commandStartTime so recordTermination can compute real duration when exitWithError/exitWithCode bypass the wrapper - Remove not_found/unknown_error from ERROR_CODE_MAP to prevent local config misses (env.ts) from being misclassified as API errors - Resolve auth.mode before sessionStart in installer flow so the session.start event carries the correct credential source - Make recordTermination fully idempotent: clears stale api.* and error.code fields when called without them - Tighten device-id UUID validation to proper RFC 4122 v4 regex - Only override termination reason to api_error when the code-based classification falls to the generic validation_error fallback, preserving more specific reasons like auth_required --- src/lib/device-id.spec.ts | 21 ++++++++-- src/lib/device-id.ts | 6 ++- src/lib/run-with-core.ts | 11 ++++- src/utils/analytics.spec.ts | 62 ++++++++++++++++++++++++++++ src/utils/analytics.ts | 63 +++++++++++++++++++++++++++-- src/utils/command-telemetry.spec.ts | 2 + src/utils/command-telemetry.ts | 6 +++ src/utils/exit-codes.spec.ts | 9 +++-- src/utils/exit-codes.ts | 3 +- src/utils/output.spec.ts | 46 +++++++++++++++++++++ src/utils/output.ts | 4 +- 11 files changed, 215 insertions(+), 18 deletions(-) diff --git a/src/lib/device-id.spec.ts b/src/lib/device-id.spec.ts index 6c65cfec..8e41824e 100644 --- a/src/lib/device-id.spec.ts +++ b/src/lib/device-id.spec.ts @@ -28,7 +28,7 @@ vi.mock('node:os', async (importOriginal) => { const { getDeviceId, __resetDeviceIdCache } = await import('./device-id.js'); -const UUID_REGEX = /^[0-9a-f-]{36}$/i; +const UUID_V4_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; describe('device-id', () => { beforeEach(() => { @@ -48,7 +48,7 @@ describe('device-id', () => { it('creates the file on first call and returns a UUID', () => { const id = getDeviceId(); - expect(id).toMatch(UUID_REGEX); + expect(id).toMatch(UUID_V4_REGEX); const filePath = join(testDir, '.workos', 'device-id'); expect(existsSync(filePath)).toBe(true); expect(readFileSync(filePath, 'utf8')).toBe(id); @@ -77,18 +77,31 @@ describe('device-id', () => { writeFileSync(join(workosDir, 'device-id'), 'not-a-uuid', 'utf8'); const id = getDeviceId(); - expect(id).toMatch(UUID_REGEX); + expect(id).toMatch(UUID_V4_REGEX); expect(id).not.toBe('not-a-uuid'); // File should be rewritten with the new UUID. expect(readFileSync(join(workosDir, 'device-id'), 'utf8')).toBe(id); }); + it('regenerates when the file contains a 36-char non-v4 string', () => { + // Guard against overly permissive regex validation — 36 hyphens passes + // a naive `[0-9a-f-]{36}` check but is not a UUIDv4. + const workosDir = join(testDir, '.workos'); + mkdirSync(workosDir, { recursive: true }); + const bogus = '------------------------------------'; + writeFileSync(join(workosDir, 'device-id'), bogus, 'utf8'); + + const id = getDeviceId(); + expect(id).toMatch(UUID_V4_REGEX); + expect(id).not.toBe(bogus); + }); + it('falls back to a one-shot UUID when the filesystem is readonly', () => { // Make the home directory non-writable so mkdirSync throws. chmodSync(testDir, 0o500); const id = getDeviceId(); - expect(id).toMatch(UUID_REGEX); + expect(id).toMatch(UUID_V4_REGEX); // File was never created. expect(existsSync(join(testDir, '.workos', 'device-id'))).toBe(false); }); diff --git a/src/lib/device-id.ts b/src/lib/device-id.ts index 61ef74aa..21a52e21 100644 --- a/src/lib/device-id.ts +++ b/src/lib/device-id.ts @@ -11,7 +11,9 @@ import path from 'node:path'; import os from 'node:os'; import crypto from 'node:crypto'; -const UUID_REGEX = /^[0-9a-f-]{36}$/i; +// RFC 4122 v4 format — matches what `crypto.randomUUID()` produces. +// Rejects non-UUID strings like "------------------------------------". +const UUID_V4_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; let cached: string | undefined; @@ -31,7 +33,7 @@ export function getDeviceId(): string { try { if (fs.existsSync(filePath)) { const raw = fs.readFileSync(filePath, 'utf8').trim(); - if (UUID_REGEX.test(raw)) { + if (UUID_V4_REGEX.test(raw)) { cached = raw; return raw; } diff --git a/src/lib/run-with-core.ts b/src/lib/run-with-core.ts index 8be93c98..74f41edc 100644 --- a/src/lib/run-with-core.ts +++ b/src/lib/run-with-core.ts @@ -538,8 +538,15 @@ export async function runWithCore(options: InstallerOptions): Promise { await adapter.start(); - // Start telemetry session. Analytics currently accepts cli/tui/headless only, - // so agent and CI mode both report through the existing headless bucket. + const preSessionEnv = getActiveEnvironment(); + if (preSessionEnv?.clientId && preSessionEnv?.apiKey) { + analytics.setAuthMode(isUnclaimedEnvironment(preSessionEnv) ? 'claim_token' : 'api_key'); + } else if (getAccessToken()) { + analytics.setAuthMode('jwt'); + } else if (process.env.WORKOS_API_KEY) { + analytics.setAuthMode('api_key'); + } + const mode = headlessMode ? 'headless' : augmentedOptions.dashboard ? 'tui' : 'cli'; analytics.sessionStart(mode, getVersion()); diff --git a/src/utils/analytics.spec.ts b/src/utils/analytics.spec.ts index 3f429e02..c6723128 100644 --- a/src/utils/analytics.spec.ts +++ b/src/utils/analytics.spec.ts @@ -825,6 +825,68 @@ describe('Analytics', () => { // fires because there's no matching event. expect(mockPatchLastEventOfType).toHaveBeenCalledWith('command', expect.any(Function)); }); + + it('patches command.duration_ms when the provisional value is 0 and a start time is set', () => { + const commandEvent = { + type: 'command', + attributes: { 'command.duration_ms': 0 } as Record, + }; + patchedEvents.push(commandEvent); + + const start = Date.now() - 250; + analytics.setCommandStart(start); + analytics.recordTermination('auth_required', 'auth_required'); + + expect(commandEvent.attributes['command.duration_ms']).toBeGreaterThanOrEqual(250); + }); + + it('does not overwrite command.duration_ms when it is already non-zero', () => { + // Wrapper-path callers run replaceLastCommandEvent first (sets real + // duration), THEN recordTermination. Duration must not regress. + const commandEvent = { + type: 'command', + attributes: { 'command.duration_ms': 1234 } as Record, + }; + patchedEvents.push(commandEvent); + + analytics.setCommandStart(Date.now() - 9999); + analytics.recordTermination('success'); + + expect(commandEvent.attributes['command.duration_ms']).toBe(1234); + }); + + it('clears stale api.* fields when called without apiContext', () => { + // Idempotency: if a prior call set api.* and a later call omits them, + // the later reason must not carry stale fields. + const commandEvent = { + type: 'command', + attributes: { + 'api.status': 401, + 'api.code': 'unauthorized', + 'api.resource': 'Organization', + } as Record, + }; + patchedEvents.push(commandEvent); + + analytics.recordTermination('validation_error', 'bad_email'); + + expect(commandEvent.attributes['api.status']).toBeUndefined(); + expect(commandEvent.attributes['api.code']).toBeUndefined(); + expect(commandEvent.attributes['api.resource']).toBeUndefined(); + expect(commandEvent.attributes['error.code']).toBe('bad_email'); + }); + + it('clears stale error.code when called without errorCode', () => { + const commandEvent = { + type: 'command', + attributes: { 'error.code': 'old_code' } as Record, + }; + patchedEvents.push(commandEvent); + + analytics.recordTermination('success'); + + expect(commandEvent.attributes['error.code']).toBeUndefined(); + }); }); }); diff --git a/src/utils/analytics.ts b/src/utils/analytics.ts index bf153d5e..b722ba6b 100644 --- a/src/utils/analytics.ts +++ b/src/utils/analytics.ts @@ -27,6 +27,9 @@ export class Analytics { private distinctId?: string; private mode?: 'cli' | 'tui' | 'headless'; private authMode: AuthMode = 'none'; + // Captured by the yargs middleware so exit-path helpers (exitWithError, + // exitWithCode) can compute real duration on the patched event. + private commandStartTime?: number; // Agent metrics tracking private totalInputTokens = 0; @@ -55,6 +58,16 @@ export class Analytics { this.authMode = mode; } + /** + * Record the command start time so `recordTermination` can compute real + * duration on exit-path code (exitWithError / exitWithCode). The wrapper's + * success/catch paths use argv-scoped start time directly; this is the + * fallback for `process.exit` callers that bypass the wrapper. + */ + setCommandStart(time: number) { + this.commandStartTime = time; + } + setGatewayUrl(url: string) { telemetryClient.setGatewayUrl(url); } @@ -330,14 +343,56 @@ export class Analytics { ): void { if (!WORKOS_TELEMETRY_ENABLED) return; + // Compute real duration for exit-path callers. The middleware captures + // commandStartTime; wrapper callers still pass their own duration via + // replaceLastCommandEvent, which runs before recordTermination and sets + // the authoritative value — we only patch duration when the current + // event still has the provisional 0. + const durationMs = this.commandStartTime !== undefined + ? Date.now() - this.commandStartTime + : undefined; + telemetryClient.patchLastEventOfType('command', (event) => { const attrs = (event as CommandEvent).attributes; attrs['termination.reason'] = reason; attrs['command.success'] = reason === 'success'; - if (errorCode) attrs['error.code'] = errorCode; - if (apiContext?.status !== undefined) attrs['api.status'] = apiContext.status; - if (apiContext?.code) attrs['api.code'] = apiContext.code; - if (apiContext?.resource) attrs['api.resource'] = apiContext.resource; + + // Duration: only overwrite if the event still carries the provisional 0 + // (wrapper paths set real duration via replaceLastCommandEvent first). + if (durationMs !== undefined && !attrs['command.duration_ms']) { + attrs['command.duration_ms'] = durationMs; + } + + if (errorCode) { + attrs['error.code'] = errorCode; + } else { + delete attrs['error.code']; + } + + // api.* fields are last-writer-wins as a group: if apiContext is + // provided, set every subfield (or clear it); if not, clear all. + // Prevents stale api.* from a prior call leaking onto a later one. + if (apiContext) { + if (apiContext.status !== undefined) { + attrs['api.status'] = apiContext.status; + } else { + delete attrs['api.status']; + } + if (apiContext.code) { + attrs['api.code'] = apiContext.code; + } else { + delete attrs['api.code']; + } + if (apiContext.resource) { + attrs['api.resource'] = apiContext.resource; + } else { + delete attrs['api.resource']; + } + } else { + delete attrs['api.status']; + delete attrs['api.code']; + delete attrs['api.resource']; + } }); } diff --git a/src/utils/command-telemetry.spec.ts b/src/utils/command-telemetry.spec.ts index 7dab84f8..e46638f7 100644 --- a/src/utils/command-telemetry.spec.ts +++ b/src/utils/command-telemetry.spec.ts @@ -4,12 +4,14 @@ import { resolveCanonicalName, extractUserFlags, commandTelemetryMiddleware, wra const mockCommandExecuted = vi.fn(); const mockReplaceLastCommandEvent = vi.fn(); const mockRecordTermination = vi.fn(); +const mockSetCommandStart = vi.fn(); vi.mock('./analytics.js', () => ({ analytics: { commandExecuted: (...args: unknown[]) => mockCommandExecuted(...args), replaceLastCommandEvent: (...args: unknown[]) => mockReplaceLastCommandEvent(...args), recordTermination: (...args: unknown[]) => mockRecordTermination(...args), + setCommandStart: (...args: unknown[]) => mockSetCommandStart(...args), }, })); diff --git a/src/utils/command-telemetry.ts b/src/utils/command-telemetry.ts index 14fc4fa9..c26a356a 100644 --- a/src/utils/command-telemetry.ts +++ b/src/utils/command-telemetry.ts @@ -56,6 +56,12 @@ export function commandTelemetryMiddleware(rawArgs: string[]) { argv.__telemetryStartTime = startTime; argv.__telemetryFlags = flags; + // Also stash on the analytics instance so exit-path helpers + // (exitWithError / exitWithCode) can compute real duration when they + // patch the provisional event — those paths never reach the wrapper's + // replaceLastCommandEvent call. + analytics.setCommandStart(startTime); + // Skip provisional event for commands with their own telemetry (e.g., install) const topLevelCommand = commandParts[0] ?? ''; if (SKIP_TELEMETRY_COMMANDS.has(topLevelCommand)) return; diff --git a/src/utils/exit-codes.spec.ts b/src/utils/exit-codes.spec.ts index ab4c69ed..3bf3271d 100644 --- a/src/utils/exit-codes.spec.ts +++ b/src/utils/exit-codes.spec.ts @@ -44,13 +44,16 @@ describe('exit-codes', () => { }); }); - it('maps not_found and unknown_error to api_error + exit 1', () => { + it('does not hard-classify not_found / unknown_error as api_error', () => { + // These codes are reused for non-API local errors (e.g. env.ts missing + // config). API failures signal via `apiContext` on `exitWithError` so + // `resolveErrorCode` falls back to `validation_error` here. expect(resolveErrorCode('not_found')).toEqual({ - reason: 'api_error', + reason: 'validation_error', exit: ExitCode.GENERAL_ERROR, }); expect(resolveErrorCode('unknown_error')).toEqual({ - reason: 'api_error', + reason: 'validation_error', exit: ExitCode.GENERAL_ERROR, }); }); diff --git a/src/utils/exit-codes.ts b/src/utils/exit-codes.ts index f0a23f34..b1767f6c 100644 --- a/src/utils/exit-codes.ts +++ b/src/utils/exit-codes.ts @@ -26,10 +26,9 @@ export type ExitCodeValue = (typeof ExitCode)[keyof typeof ExitCode]; const ERROR_CODE_MAP: Record = { auth_required: { reason: 'auth_required', exit: ExitCode.AUTH_REQUIRED }, cancelled: { reason: 'cancelled', exit: ExitCode.CANCELLED }, - not_found: { reason: 'api_error', exit: ExitCode.GENERAL_ERROR }, - unknown_error: { reason: 'api_error', exit: ExitCode.GENERAL_ERROR }, }; + export function resolveErrorCode(code: string): { reason: TerminationReason; exit: ExitCodeValue; diff --git a/src/utils/output.spec.ts b/src/utils/output.spec.ts index 54f9a925..34303dbc 100644 --- a/src/utils/output.spec.ts +++ b/src/utils/output.spec.ts @@ -295,5 +295,51 @@ describe('output', () => { errorSpy.mockRestore(); exitSpy.mockRestore(); }); + + it('preserves auth_required reason even when apiContext is present', () => { + // A 401 with apiContext must still classify as auth_required, not + // api_error — the more specific reason wins over the override. + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithError({ + code: 'auth_required', + message: 'not authenticated', + apiContext: { status: 401, code: 'unauthorized', resource: 'Organization' }, + }); + + expect(mockRecordTermination).toHaveBeenCalledWith( + 'auth_required', + 'auth_required', + { status: 401, code: 'unauthorized', resource: 'Organization' }, + ); + expect(exitSpy).toHaveBeenCalledWith(4); + + errorSpy.mockRestore(); + exitSpy.mockRestore(); + }); + + it('overrides validation_error fallback to api_error when apiContext is present', () => { + // WorkOS error codes like `rate_limited` that are not in ERROR_CODE_MAP + // fall through to validation_error. With apiContext, they should be + // reclassified as api_error so API-failure dashboards see them. + const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => undefined as never); + + exitWithError({ + code: 'rate_limited', + message: 'slow down', + apiContext: { status: 429, code: 'rate_limited', resource: 'Organization' }, + }); + + expect(mockRecordTermination).toHaveBeenCalledWith( + 'api_error', + 'rate_limited', + expect.objectContaining({ status: 429 }), + ); + + errorSpy.mockRestore(); + exitSpy.mockRestore(); + }); }); }); diff --git a/src/utils/output.ts b/src/utils/output.ts index 8096bc48..eb324ba4 100644 --- a/src/utils/output.ts +++ b/src/utils/output.ts @@ -141,7 +141,9 @@ export function exitWithError( ): never { outputError(error); const { reason: codeReason, exit } = resolveErrorCode(error.code); - const reason = error.apiContext ? 'api_error' : codeReason; + const reason = error.apiContext && codeReason === 'validation_error' + ? 'api_error' + : codeReason; analytics.recordTermination(reason, error.code, error.apiContext); process.exit(exit); } From 0475ea806bd11bf553d496e15278c8192109d403 Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Tue, 12 May 2026 17:13:33 -0500 Subject: [PATCH 15/15] fix(telemetry): close coverage gaps and add guardrail test Wrap the `api` command handler with `wrapCommandHandler()` so it reports real duration and success/failure instead of the provisional defaults (duration=0, success=true). Replace raw `process.exit()` calls with `exitWithCode`/`exitWithError` across 10 command files so `recordTermination` fires before exit. This ensures `termination.reason` and `error.code` are populated on every command event. Long-running server handlers (dev/emulate SIGINT) are left as-is. Fix help-json.ts alias drift by importing from the shared `COMMAND_ALIASES` map instead of maintaining a private copy. Add a coverage test that scans bin.ts for inline handlers missing `wrapCommandHandler`, catching the class of regression that let the `api` command slip through. --- src/bin.ts | 4 +- src/commands/api/index.spec.ts | 2 +- src/commands/api/index.ts | 9 +++- src/commands/api/interactive.spec.ts | 12 ++--- src/commands/api/interactive.ts | 12 +++-- src/commands/dev.ts | 9 ++-- src/commands/doctor.ts | 7 +-- src/commands/emulate.ts | 4 +- src/commands/env.ts | 9 ++-- src/commands/install-skill.ts | 7 +-- src/commands/login.ts | 6 +-- src/commands/uninstall-skill.ts | 5 +- src/utils/command-telemetry.spec.ts | 71 ++++++++++++++++++++++++++++ src/utils/help-json.ts | 6 +-- 14 files changed, 125 insertions(+), 38 deletions(-) diff --git a/src/bin.ts b/src/bin.ts index 1e8b6217..4ba4f52c 100644 --- a/src/bin.ts +++ b/src/bin.ts @@ -533,7 +533,7 @@ yargs(rawArgs) .example('workos api /user_management/users', 'GET /user_management/users') .example('workos api /organizations -d \'{"name":"Acme"}\'', 'POST with a JSON body') .example('workos api /organizations/org_123 -X DELETE', 'DELETE an organization'), - async (argv) => { + wrapCommandHandler(async (argv) => { await applyInsecureStorage(argv.insecureStorage as boolean | undefined); const endpoint = argv.endpoint as string | undefined; const filter = argv.filter as string | undefined; @@ -559,7 +559,7 @@ yargs(rawArgs) dryRun: argv.dryRun, yes: argv.yes, }); - }, + }), ) .command(['organization', 'org'], 'Manage WorkOS organizations (create, update, get, list, delete)', (yargs) => { yargs.options({ diff --git a/src/commands/api/index.spec.ts b/src/commands/api/index.spec.ts index 0840de35..1330cf40 100644 --- a/src/commands/api/index.spec.ts +++ b/src/commands/api/index.spec.ts @@ -317,7 +317,7 @@ describe('runApiRequest', () => { it('aborts when the user declines the confirmation prompt', async () => { mockConfirm.mockResolvedValueOnce(false); - await expect(runApiRequest('/organizations', { method: 'POST', data: '{}' })).rejects.toThrow(/__exit__:0/); + await expect(runApiRequest('/organizations', { method: 'POST', data: '{}' })).rejects.toThrow(/__exit__:2/); expect(mockApiRequest).not.toHaveBeenCalled(); }); diff --git a/src/commands/api/index.ts b/src/commands/api/index.ts index 0f60b932..8799cddf 100644 --- a/src/commands/api/index.ts +++ b/src/commands/api/index.ts @@ -4,6 +4,7 @@ import { loadCatalog, endpointsByTag } from './catalog.js'; import { apiRequest } from './request.js'; import { resolveApiBaseUrl } from '../../lib/api-key.js'; import { exitWithError, isJsonMode, outputJson } from '../../utils/output.js'; +import { ExitCode, exitWithCode } from '../../utils/exit-codes.js'; import { isCiMode, isPromptAllowed } from '../../utils/interaction-mode.js'; import { confirmationRecovery } from '../../utils/recovery-hints.js'; import { formatWorkOSCommandArgs } from '../../utils/command-invocation.js'; @@ -145,7 +146,7 @@ export async function runApiRequest(endpoint: string, options: ApiCommandOptions if (hasBody) prettyPrint(body); const ok = await clack.confirm({ message: 'Proceed?' }); if (!ok || clack.isCancel(ok)) { - process.exit(0); + exitWithCode(ExitCode.CANCELLED); } } @@ -160,7 +161,11 @@ export async function runApiRequest(endpoint: string, options: ApiCommandOptions printResponse(response, { includeStatus: options.include }); if (response.status >= 400) { - process.exit(1); + exitWithError({ + code: `http_${response.status}`, + message: `API request failed with status ${response.status}`, + apiContext: { status: response.status }, + }); } } diff --git a/src/commands/api/interactive.spec.ts b/src/commands/api/interactive.spec.ts index 9469bc94..ddc33b59 100644 --- a/src/commands/api/interactive.spec.ts +++ b/src/commands/api/interactive.spec.ts @@ -232,20 +232,20 @@ describe('apiInteractive', () => { expect(mockApiRequest).toHaveBeenCalledWith(expect.objectContaining({ body: undefined })); }); - it('exits with code 0 when the user cancels at the category prompt', async () => { + it('exits with code 2 when the user cancels at the category prompt', async () => { mockSelect.mockResolvedValueOnce(cancelSymbol); - await expect(apiInteractive()).rejects.toThrow(/__exit__:0/); - expect(exitSpy).toHaveBeenCalledWith(0); + await expect(apiInteractive()).rejects.toThrow(/__exit__:2/); + expect(exitSpy).toHaveBeenCalledWith(2); expect(mockApiRequest).not.toHaveBeenCalled(); }); - it('exits with code 0 when the user declines the final confirmation', async () => { + it('exits with code 2 when the user declines the final confirmation', async () => { mockSelect.mockResolvedValueOnce('Users').mockResolvedValueOnce(mockCatalog.endpoints[0]); mockConfirm.mockResolvedValueOnce(false); - await expect(apiInteractive()).rejects.toThrow(/__exit__:0/); - expect(exitSpy).toHaveBeenCalledWith(0); + await expect(apiInteractive()).rejects.toThrow(/__exit__:2/); + expect(exitSpy).toHaveBeenCalledWith(2); expect(mockApiRequest).not.toHaveBeenCalled(); }); diff --git a/src/commands/api/interactive.ts b/src/commands/api/interactive.ts index 8e7a12f8..4fd18f1c 100644 --- a/src/commands/api/interactive.ts +++ b/src/commands/api/interactive.ts @@ -3,9 +3,11 @@ import { loadCatalog, endpointsByTag, type EndpointInfo } from './catalog.js'; import { apiRequest } from './request.js'; import { colorMethod, printResponse } from './format.js'; import { resolveApiKey, resolveApiBaseUrl } from '../../lib/api-key.js'; +import { ExitCode, exitWithCode } from '../../utils/exit-codes.js'; +import { exitWithError } from '../../utils/output.js'; function assertNotCancelled(value: T | symbol): T { - if (clack.isCancel(value)) process.exit(0); + if (clack.isCancel(value)) exitWithCode(ExitCode.CANCELLED); return value as T; } @@ -136,7 +138,7 @@ export async function apiInteractive(options?: { apiKey?: string }): Promise= 400) { - process.exit(1); + exitWithError({ + code: `http_${response.status}`, + message: `API request failed with status ${response.status}`, + apiContext: { status: response.status }, + }); } } diff --git a/src/commands/dev.ts b/src/commands/dev.ts index cfc8c6ab..5766e4b4 100644 --- a/src/commands/dev.ts +++ b/src/commands/dev.ts @@ -5,6 +5,8 @@ import { readFileSync, existsSync } from 'node:fs'; import { resolve } from 'node:path'; import { parse as parseYaml } from 'yaml'; import chalk from 'chalk'; +import { ExitCode, exitWithCode } from '../utils/exit-codes.js'; +import { exitWithError } from '../utils/output.js'; export interface DevArgs { port: number; @@ -15,8 +17,7 @@ export interface DevArgs { function loadSeedFile(filePath: string): EmulatorSeedConfig { const resolved = resolve(filePath); if (!existsSync(resolved)) { - console.error(`Seed file not found: ${resolved}`); - process.exit(1); + exitWithError({ code: 'seed_not_found', message: `Seed file not found: ${resolved}` }); } const content = readFileSync(resolved, 'utf-8'); @@ -127,7 +128,7 @@ export async function runDev(argv: DevArgs): Promise { console.error(chalk.red(`Failed to start: ${devCmd.command} ${devCmd.args.join(' ')}`)); console.error(chalk.dim('Try specifying the command explicitly: workos dev -- ')); await emulator.close(); - process.exit(1); + exitWithCode(ExitCode.GENERAL_ERROR); } child.on('error', async (err) => { @@ -139,7 +140,7 @@ export async function runDev(argv: DevArgs): Promise { console.error(chalk.dim(err.message)); } await emulator.close(); - process.exit(1); + exitWithCode(ExitCode.GENERAL_ERROR); }); // 5. Signal handling — forward to child, then close emulator diff --git a/src/commands/doctor.ts b/src/commands/doctor.ts index 419deee8..32e83a13 100644 --- a/src/commands/doctor.ts +++ b/src/commands/doctor.ts @@ -1,6 +1,7 @@ import type { ArgumentsCamelCase } from 'yargs'; import { runDoctor, outputReport } from '../doctor/index.js'; import clack from '../utils/clack.js'; +import { ExitCode, exitWithCode } from '../utils/exit-codes.js'; interface DoctorArgs { verbose?: boolean; @@ -29,15 +30,15 @@ export async function handleDoctor(argv: ArgumentsCamelCase): Promis // Exit with error code if critical issues found if (report.summary.errors > 0) { - process.exit(1); + exitWithCode(ExitCode.GENERAL_ERROR); } - process.exit(0); + exitWithCode(ExitCode.SUCCESS); } catch (error) { if (!options.json) { clack.log.error(`Doctor failed: ${error instanceof Error ? error.message : 'Unknown error'}`); } else { console.error(JSON.stringify({ error: error instanceof Error ? error.message : 'Unknown error' })); } - process.exit(1); + exitWithCode(ExitCode.GENERAL_ERROR); } } diff --git a/src/commands/emulate.ts b/src/commands/emulate.ts index d257e808..56316e21 100644 --- a/src/commands/emulate.ts +++ b/src/commands/emulate.ts @@ -3,6 +3,7 @@ import { readFileSync, existsSync } from 'node:fs'; import { resolve } from 'node:path'; import { parse as parseYaml } from 'yaml'; import chalk from 'chalk'; +import { exitWithError } from '../utils/output.js'; export interface EmulateArgs { port: number; @@ -13,8 +14,7 @@ export interface EmulateArgs { function loadSeedFile(filePath: string): EmulatorSeedConfig { const resolved = resolve(filePath); if (!existsSync(resolved)) { - console.error(`Seed file not found: ${resolved}`); - process.exit(1); + exitWithError({ code: 'seed_not_found', message: `Seed file not found: ${resolved}` }); } const content = readFileSync(resolved, 'utf-8'); diff --git a/src/commands/env.ts b/src/commands/env.ts index cff97105..9613b5a4 100644 --- a/src/commands/env.ts +++ b/src/commands/env.ts @@ -6,6 +6,7 @@ import { outputSuccess, outputJson, exitWithError, isJsonMode } from '../utils/o import { isAgentMode, isCiMode, isPromptAllowed } from '../utils/interaction-mode.js'; import { missingArgsRecovery } from '../utils/recovery-hints.js'; import { formatWorkOSCommand } from '../utils/command-invocation.js'; +import { ExitCode, exitWithCode } from '../utils/exit-codes.js'; const ENV_NAME_REGEX = /^[a-z0-9\-_]+$/; @@ -51,7 +52,7 @@ export async function runEnvAdd(options: { message: 'Enter a name for the environment (e.g., production, sandbox, local)', validate: (value) => validateEnvName(value), }); - if (clack.isCancel(nameResult)) process.exit(0); + if (clack.isCancel(nameResult)) exitWithCode(ExitCode.CANCELLED); name = nameResult; const typeResult = await clack.select({ @@ -61,7 +62,7 @@ export async function runEnvAdd(options: { { value: 'sandbox', label: 'Sandbox' }, ], }); - if (clack.isCancel(typeResult)) process.exit(0); + if (clack.isCancel(typeResult)) exitWithCode(ExitCode.CANCELLED); const apiKeyResult = await clack.password({ message: 'Enter the API key for this environment', @@ -70,7 +71,7 @@ export async function runEnvAdd(options: { return undefined; }, }); - if (clack.isCancel(apiKeyResult)) process.exit(0); + if (clack.isCancel(apiKeyResult)) exitWithCode(ExitCode.CANCELLED); apiKey = apiKeyResult; const config = getOrCreateConfig(); @@ -174,7 +175,7 @@ export async function runEnvSwitch(name?: string): Promise { message: 'Select an environment', options, }); - if (clack.isCancel(selected)) process.exit(0); + if (clack.isCancel(selected)) exitWithCode(ExitCode.CANCELLED); name = selected as string; } diff --git a/src/commands/install-skill.ts b/src/commands/install-skill.ts index 6cecc35b..cde0edca 100644 --- a/src/commands/install-skill.ts +++ b/src/commands/install-skill.ts @@ -4,6 +4,7 @@ import { existsSync } from 'fs'; import { mkdir, mkdtemp, cp, rename, rm, readdir, readFile, stat, access, writeFile } from 'fs/promises'; import chalk from 'chalk'; import { getSkillsDir as getSkillsPackageDir } from '@workos/skills'; +import { ExitCode, exitWithCode } from '../utils/exit-codes.js'; export const SKILL_VERSION_MARKER_FILENAME = '.workos-skill-version'; @@ -201,7 +202,7 @@ export async function runInstallSkill(options: InstallSkillOptions): Promise { } catch (error) { const msg = error instanceof Error ? error.message : String(error); clack.log.error(`Failed to start authentication: ${msg}`); - process.exit(1); + exitWithCode(ExitCode.GENERAL_ERROR); } clack.log.info(`\nOpen this URL in your browser:\n`); @@ -193,6 +193,6 @@ export async function runLogin(): Promise { const msg = error instanceof Error ? error.message : String(error); clack.log.error(`Authentication error: ${msg}`); } - process.exit(1); + exitWithCode(ExitCode.GENERAL_ERROR); } } diff --git a/src/commands/uninstall-skill.ts b/src/commands/uninstall-skill.ts index 64e4f1f9..ac9fbd46 100644 --- a/src/commands/uninstall-skill.ts +++ b/src/commands/uninstall-skill.ts @@ -6,6 +6,7 @@ import chalk from 'chalk'; import { logError, logInfo, logWarn } from '../utils/debug.js'; import { exitWithError, isJsonMode, outputJson } from '../utils/output.js'; import { createAgents, detectAgents, discoverSkills, getSkillsDir, type AgentConfig } from './install-skill.js'; +import { ExitCode, exitWithCode } from '../utils/exit-codes.js'; export interface UninstallSkillOptions { skill?: string[]; @@ -121,7 +122,7 @@ export async function runUninstallSkill(options: UninstallSkillOptions): Promise if (isJsonMode()) { outputJson({ removed, skipped, failed }); if (failed.length > 0) { - process.exit(1); + exitWithCode(ExitCode.GENERAL_ERROR); } return; } @@ -144,7 +145,7 @@ export async function runUninstallSkill(options: UninstallSkillOptions): Promise for (const r of failed) { console.log(` ${r.skill} ← ${r.agent}: ${chalk.dim(r.error)}`); } - process.exit(1); + exitWithCode(ExitCode.GENERAL_ERROR); } console.log(chalk.green('\nDone!')); diff --git a/src/utils/command-telemetry.spec.ts b/src/utils/command-telemetry.spec.ts index e46638f7..982df53a 100644 --- a/src/utils/command-telemetry.spec.ts +++ b/src/utils/command-telemetry.spec.ts @@ -1,3 +1,5 @@ +import { readFileSync } from 'fs'; +import { resolve } from 'path'; import { describe, it, expect, beforeEach, vi } from 'vitest'; import { resolveCanonicalName, extractUserFlags, commandTelemetryMiddleware, wrapCommandHandler } from './command-telemetry.js'; @@ -197,4 +199,73 @@ describe('command-telemetry', () => { expect(errorArg.message).toBe('string error'); }); }); + + describe('telemetry coverage', () => { + it('all top-level .command() inline handlers in bin.ts use wrapCommandHandler or are in the skip list', () => { + const binSource = readFileSync(resolve(__dirname, '../bin.ts'), 'utf-8'); + + // Commands that intentionally skip wrapCommandHandler because they have their own + // session-based telemetry. Keep in sync with SKIP_TELEMETRY_COMMANDS in command-telemetry.ts + // (root maps to the $0 default handler). + const INTENTIONAL_SKIPS = new Set(['install', 'dashboard', '$0']); + + const lines = binSource.split('\n'); + const unwrapped: string[] = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + // Match the start of an inline yargs handler: `async (argv) =>` or `async () =>` + if (!/async\s*\((?:argv)?\)\s*=>/.test(line)) continue; + // Already wrapped on this line + if (line.includes('wrapCommandHandler(')) continue; + // Skip yargs middleware (e.g. `.middleware(async (argv) => ...)`) + if (/\.middleware\(/.test(line)) continue; + + // Walk backwards. The nearest preceding scope-opening line is either: + // - `registerSubcommand(...)` — handler is auto-wrapped, skip + // - `.command(` — this is the owning top-level command. Capture its name + // argument (which may be on the same line or on the next non-empty line). + // Whichever comes FIRST (closest to this handler line) wins. + let commandName: string | null = null; + let isSubcommandHandler = false; + for (let j = i - 1; j >= Math.max(0, i - 100); j--) { + const prev = lines[j]; + if (/registerSubcommand\(/.test(prev)) { + isSubcommandHandler = true; + break; + } + if (/\.command\(/.test(prev)) { + // Match name on same line, or look at the next non-empty line if .command( ends the line. + const inline = prev.match(/\.command\(\s*(?:'([^']+)'|"([^"]+)"|\[\s*'([^']+)'|\[\s*"([^"]+)")/); + if (inline) { + commandName = inline[1] ?? inline[2] ?? inline[3] ?? inline[4] ?? null; + } else { + // .command( with no name on this line — name is on the following line(s). + for (let k = j + 1; k < i; k++) { + const nameLine = lines[k].trim(); + if (!nameLine) continue; + const m = nameLine.match(/^(?:'([^']+)'|"([^"]+)"|\[\s*'([^']+)'|\[\s*"([^"]+)")/); + if (m) { + commandName = m[1] ?? m[2] ?? m[3] ?? m[4] ?? null; + } + break; + } + } + break; + } + } + + if (isSubcommandHandler) continue; + if (!commandName) continue; + + // Extract the leading token (e.g. "setup-org " -> "setup-org") for the skip check. + const firstToken = commandName.split(/\s+/)[0]; + if (INTENTIONAL_SKIPS.has(firstToken)) continue; + + unwrapped.push(`"${commandName}" (bin.ts:${i + 1})`); + } + + expect(unwrapped).toEqual([]); + }); + }); }); diff --git a/src/utils/help-json.ts b/src/utils/help-json.ts index ce66f5f2..377fb584 100644 --- a/src/utils/help-json.ts +++ b/src/utils/help-json.ts @@ -7,6 +7,7 @@ */ import { getVersion } from '../lib/settings.js'; +import { COMMAND_ALIASES } from '../lib/command-aliases.js'; export interface OptionSchema { name: string; @@ -1327,10 +1328,9 @@ const globalOptions: OptionSchema[] = [ // Public API // --------------------------------------------------------------------------- -const commandAliases: Record = { org: 'organization' }; const helpJsonCommandNames = new Set([ ...commands.map((command) => command.name.split(' ')[0]), - ...Object.keys(commandAliases), + ...Object.keys(COMMAND_ALIASES), ]); /** @@ -1350,7 +1350,7 @@ export function extractHelpJsonCommand(argv: string[]): string | undefined { continue; } if (!arg.startsWith('-') && helpJsonCommandNames.has(arg)) { - return commandAliases[arg] ?? arg; + return COMMAND_ALIASES[arg] ?? arg; } } return undefined;