Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion backend/src/routes/health.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,22 @@ export function createHealthRoutes(db: Database, openCodeSupervisor?: OpenCodeSu
: null
const opencodeHealthy = lifecycle?.healthy ?? await opencodeServerManager.checkHealth()
const startupError = lifecycle?.lastError ?? opencodeServerManager.getLastStartupError()
const ready = lifecycle ? lifecycle.ready : opencodeHealthy

const status = lifecycle?.state === 'recovering'
? 'degraded'
: startupError && !opencodeHealthy
? 'unhealthy'
: opencodeHealthy && !ready
? 'degraded'
: (dbCheck && opencodeHealthy ? 'healthy' : 'degraded')

const response: Record<string, unknown> = {
status,
timestamp: new Date().toISOString(),
database: dbCheck ? 'connected' : 'disconnected',
opencode: opencodeHealthy ? 'healthy' : 'unhealthy',
opencode: !opencodeHealthy ? 'unhealthy' : (ready ? 'healthy' : 'busy'),
opencodeReady: ready,
opencodePort: opencodeServerManager.getPort(),
opencodeVersion: opencodeServerManager.getVersion(),
opencodeMinVersion: opencodeServerManager.getMinVersion(),
Expand Down
57 changes: 55 additions & 2 deletions backend/src/services/opencode-single-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ import { writeFileContent } from './file-operations'

const MIN_OPENCODE_VERSION = '1.0.137'
const MAX_STDERR_SIZE = 10240
const HEALTH_CHECK_TIMEOUT_MS = 3000
const DEPRECATED_PLUGIN_PACKAGES = ['opencode-openai-codex-auth', 'opencode-copilot-auth']

export interface ServerExitInfo { code: number | null; signal: NodeJS.Signals | null }

type StartupValidationIssue = {
path: string
message: string
Expand Down Expand Up @@ -96,6 +97,7 @@ const getOpenCodeServerPort = () => ENV.OPENCODE.PORT
const getOpenCodeServerHost = () => ENV.OPENCODE.HOST
const getOpenCodeServerPublicUrl = () => ENV.OPENCODE.PUBLIC_URL
const getOpenCodeServerUsername = () => ENV.OPENCODE.SERVER_USERNAME
const getHealthProbeTimeoutMs = () => ENV.OPENCODE.HEALTH_PROBE_TIMEOUT_MS ?? 10000

class OpenCodeServerManager {
private static instance: OpenCodeServerManager
Expand All @@ -107,6 +109,8 @@ class OpenCodeServerManager {
private lastStartupError: string | null = null
private opInProgress: boolean = false
private openCodeClient: OpenCodeClient | null = null
private expectedExit = false
private exitListeners = new Set<(info: ServerExitInfo) => void>()

private constructor() {}

Expand Down Expand Up @@ -173,6 +177,22 @@ class OpenCodeServerManager {
return this.opInProgress
}

isProcessAlive(): boolean {
if (this.serverPid === null) return false
try {
process.kill(this.serverPid, 0)
return true
} catch (error) {
const code = error && typeof error === 'object' && 'code' in error ? (error as { code: string }).code : ''
return code === 'EPERM'
}
}

onUnexpectedExit(listener: (info: ServerExitInfo) => void): () => void {
this.exitListeners.add(listener)
return () => { this.exitListeners.delete(listener) }
}

async start(retryAfterPluginInstall = true, allowNested = false): Promise<void> {
const acquired = this.acquireOp()
if (!acquired && !allowNested) {
Expand Down Expand Up @@ -257,6 +277,23 @@ class OpenCodeServerManager {
return
}
} else {
const isAlive = existingProcesses.some(proc => {
try {
process.kill(proc.pid, 0)
return true
} catch (error) {
const code = error && typeof error === 'object' && 'code' in error ? (error as { code: string }).code : ''
return code === 'EPERM'
}
})
if (isAlive) {
logger.warn('OpenCode server is alive but not answering readiness probe (busy); keeping existing process')
this.isHealthy = true
if (existingProcesses[0]) {
this.serverPid = existingProcesses[0].pid
}
return
}
logger.warn('Killing unhealthy OpenCode server')
for (const proc of existingProcesses) {
try {
Expand Down Expand Up @@ -373,7 +410,10 @@ class OpenCodeServerManager {
})
}

const child = this.serverProcess
this.serverProcess.on('exit', (code, signal) => {
if (this.serverProcess !== child) return
const wasExpected = this.expectedExit
if (code !== null && code !== 0) {
const fallback = `Server exited with code ${code}${stderrOutput ? `: ${stderrOutput.slice(-500)}` : ''}`
this.lastStartupError = formatStartupError(stderrOutput, fallback)
Expand All @@ -382,15 +422,27 @@ class OpenCodeServerManager {
this.lastStartupError = `Server terminated by signal ${signal}`
logger.error('OpenCode server process terminated:', this.lastStartupError)
}
this.isHealthy = false
this.serverPid = null
if (!wasExpected) {
this.exitListeners.forEach((listener) => { try { listener({ code, signal }) } catch { /* ignore */ } })
}
})

this.serverPid = this.serverProcess.pid ?? null
this.expectedExit = false

logger.info(`OpenCode server started with PID ${this.serverPid}`)

const healthTimeoutMs = configuredPluginCount > 0 ? 120000 : 30000
const healthy = await this.waitForHealth(healthTimeoutMs)
if (!healthy) {
if (this.isProcessAlive()) {
logger.warn(`OpenCode server process is alive but not answering readiness probe after ${Math.round(healthTimeoutMs / 1000)}s; treating as busy`)
this.isHealthy = true
return
}

const fallback = `Server failed to become healthy after ${Math.round(healthTimeoutMs / 1000)}s${stderrOutput ? `. Last error: ${stderrOutput.slice(-500)}` : ''}`
this.lastStartupError = formatStartupError(stderrOutput, fallback)
if (configuredPluginCount > 0 && retryAfterPluginInstall) {
Expand Down Expand Up @@ -429,6 +481,7 @@ class OpenCodeServerManager {
if (!this.serverPid) return

logger.info('Stopping OpenCode server')
this.expectedExit = true
try {
process.kill(this.serverPid, 'SIGTERM')
} catch (error) {
Expand Down Expand Up @@ -705,7 +758,7 @@ class OpenCodeServerManager {
const response = await this.openCodeClient.forward({
method: 'GET',
path: '/doc',
signal: AbortSignal.timeout(HEALTH_CHECK_TIMEOUT_MS),
signal: AbortSignal.timeout(getHealthProbeTimeoutMs()),
})
return response.ok
} catch {
Expand Down
59 changes: 54 additions & 5 deletions backend/src/services/opencode-supervisor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@ export type OpenCodeOperationReason =
| 'settings_restart'
| 'settings_reload'
| 'manual'
| 'crash'

export interface OpenCodeLifecycleStatus {
state: OpenCodeLifecycleState
healthy: boolean
ready: boolean
port: number
version: string | null
minVersion: string
Expand Down Expand Up @@ -66,6 +68,8 @@ export class OpenCodeSupervisor {
private attemptedRecoveryActions: OpenCodeRecoveryAction[] = []
private consecutiveFailures = 0
private operationInProgress = false
private ready = false
private exitListenerRegistered = false
private updatedAt = new Date().toISOString()

constructor(
Expand All @@ -88,17 +92,27 @@ export class OpenCodeSupervisor {
}

async start(): Promise<OpenCodeLifecycleStatus> {
if (!this.exitListenerRegistered) {
this.exitListenerRegistered = true
this.openCodeServerManager.onUnexpectedExit(() => { void this.handleUnexpectedExit() })
}

await this.runLifecycleOperation(async () => {
this.setState('starting')

try {
await this.openCodeServerManager.start()
const healthy = await this.openCodeServerManager.checkHealth()
if (healthy) {
const ready = await this.openCodeServerManager.checkHealth()
if (ready) {
this.markHealthy()
return this.getStatus()
}

if (this.openCodeServerManager.isProcessAlive()) {
this.markBusy()
return this.getStatus()
}

this.recordFailure('OpenCode server failed to become healthy during startup')
} catch (error) {
this.recordFailure(error)
Expand Down Expand Up @@ -161,6 +175,7 @@ export class OpenCodeSupervisor {

await this.runLifecycleOperation(async () => {
this.setState('stopping')
this.ready = false
await this.openCodeServerManager.stop()
this.setState('stopped')
return this.getStatus()
Expand All @@ -177,6 +192,7 @@ export class OpenCodeSupervisor {
return {
state: this.state,
healthy: this.state === 'healthy',
ready: this.ready,
port: this.openCodeServerManager.getPort(),
version: this.openCodeServerManager.getVersion(),
minVersion: this.openCodeServerManager.getMinVersion(),
Expand Down Expand Up @@ -206,15 +222,22 @@ export class OpenCodeSupervisor {
}

private async refreshHealthOrRecover(reason: OpenCodeOperationReason, respectThreshold = false): Promise<OpenCodeLifecycleStatus> {
const healthy = await this.openCodeServerManager.checkHealth()
if (healthy) {
const ready = await this.openCodeServerManager.checkHealth()
if (ready) {
this.markHealthy()
return this.getStatus()
}

if (this.openCodeServerManager.isProcessAlive()) {
logger.warn('OpenCode is alive but not answering readiness probe (busy); skipping restart')
this.markBusy()
return this.getStatus()
}

this.consecutiveFailures += 1
this.ready = false
this.setState('unhealthy')
this.lastError = this.openCodeServerManager.getLastStartupError() ?? 'OpenCode health check failed'
this.lastError = this.openCodeServerManager.getLastStartupError() ?? 'OpenCode process is not running'

if (respectThreshold && this.consecutiveFailures < this.failureThreshold) {
return this.getStatus()
Expand Down Expand Up @@ -253,6 +276,20 @@ export class OpenCodeSupervisor {
return this.getStatus()
}

private async handleUnexpectedExit(): Promise<void> {
if (!this.isWatchEnabled()) return
if (this.operationInProgress || this.openCodeServerManager.isOperationInProgress()) return

logger.warn('OpenCode server exited unexpectedly; starting recovery')
await this.runLifecycleOperation(async () => {
this.consecutiveFailures += 1
this.ready = false
this.setState('unhealthy')
this.lastError = this.openCodeServerManager.getLastStartupError() ?? 'OpenCode server exited unexpectedly'
return this.recover('crash')
})
}

private async runRecoveryAction(action: OpenCodeRecoveryAction): Promise<void> {
if (action === 'restart') {
await this.openCodeServerManager.restart()
Expand Down Expand Up @@ -352,6 +389,17 @@ export class OpenCodeSupervisor {

private markHealthy(): void {
this.state = 'healthy'
this.ready = true
this.lastError = null
this.activeRecoveryAction = null
this.attemptedRecoveryActions = []
this.consecutiveFailures = 0
this.touch()
}

private markBusy(): void {
this.state = 'healthy'
this.ready = false
this.lastError = null
this.activeRecoveryAction = null
this.attemptedRecoveryActions = []
Expand All @@ -361,6 +409,7 @@ export class OpenCodeSupervisor {

private recordFailure(error: unknown): void {
this.consecutiveFailures += 1
this.ready = false
this.lastError = error instanceof Error
? error.message
: this.openCodeServerManager.getLastStartupError() ?? 'Unknown OpenCode lifecycle error'
Expand Down
34 changes: 34 additions & 0 deletions backend/test/routes/health.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,40 @@ describe('Health Routes', () => {
expect(json.database).toBe('disconnected')
})

it('should return degraded status with opencode busy when supervisor reports healthy but not ready', async () => {
mockDb.prepare().get.mockReturnValue({ 1: 1 })
;(opencodeServerManager.getLastStartupError as ReturnType<typeof vi.fn>).mockReturnValueOnce(null)

const fakeSupervisor = {
checkNow: vi.fn().mockResolvedValueOnce({
state: 'healthy',
healthy: true,
ready: false,
port: 5551,
version: '1.0.0',
minVersion: '1.0.137',
versionSupported: true,
lastError: null,
activeRecoveryAction: null,
attemptedRecoveryActions: [],
nextRecoveryAction: null,
failureCount: 0,
watching: true,
updatedAt: new Date().toISOString(),
}),
}

const healthAppWithSupervisor = createHealthRoutes(mockDb, fakeSupervisor as any)
const req = new Request('http://localhost/')
const res = await healthAppWithSupervisor.fetch(req)
const json = await res.json() as Record<string, unknown>

expect(res.status).toBe(200)
expect(json.status).toBe('degraded')
expect(json.opencode).toBe('busy')
expect(json.opencodeReady).toBe(false)
})

it('should return 503 when health check throws an error', async () => {
mockDb.prepare().get.mockImplementationOnce(() => {
throw new Error('Database error')
Expand Down
Loading