diff --git a/.changeset/ready-endpoint-bg-bootstrap.md b/.changeset/ready-endpoint-bg-bootstrap.md new file mode 100644 index 0000000000..95f8ba6697 --- /dev/null +++ b/.changeset/ready-endpoint-bg-bootstrap.md @@ -0,0 +1,16 @@ +--- +"ensrainbow": minor +"@ensnode/ensrainbow-sdk": minor +"ensindexer": patch +--- + +ENSRainbow now starts its HTTP server immediately and downloads/validates its database in the background, instead of blocking container startup behind a netcat placeholder. + +- **New `GET /ready` endpoint**: returns `200 { status: "ok" }` once the database is attached, or `503 Service Unavailable` while ENSRainbow is still bootstrapping. `/health` is now a pure liveness probe that succeeds as soon as the HTTP server is listening. +- **503 responses for API routes during bootstrap**: `/v1/heal`, `/v1/labels/count`, and `/v1/config` return a structured `ServiceUnavailableError` (`errorCode: 503`) until the database is ready. +- **New Docker entrypoint**: the container now runs `pnpm run entrypoint` from the `apps/ensrainbow` working directory (implemented in Node via `tsx src/cli.ts entrypoint`), which replaces `scripts/entrypoint.sh` and the `netcat` workaround. +- **Graceful shutdown during bootstrap**: SIGTERM/SIGINT now abort an in-flight bootstrap. Spawned `download`/`tar` child processes are terminated (SIGTERM → SIGKILL after a 5s grace period) and any partially-opened LevelDB handle is closed before the HTTP server and DB-backed server shut down, so the container exits promptly without leaking child processes or LevelDB locks. +- **SDK client**: added `EnsRainbowApiClient.ready()`, plus `EnsRainbow.ReadyResponse` / `EnsRainbow.ServiceUnavailableError` types and `ErrorCode.ServiceUnavailable`. The client now throws a typed `EnsRainbowHttpError` (with structured `status` / `statusText` properties) from `ready()`, `health()`, and `config()` whenever the service responds with a non-2xx HTTP status, so callers can branch their retry/abort logic on the status without parsing message strings. +- **ENSIndexer**: `waitForEnsRainbowToBeReady` now polls `/ready` (via `ensRainbowClient.ready()`) instead of `/health`, so it correctly waits for the database to finish bootstrapping. It also aborts retries immediately on non-503 HTTP responses (e.g. `404` from a misconfigured `ENSRAINBOW_URL`, `500` from a broken instance) instead of blocking startup for ~1h, while still retrying on `503 Service Unavailable` and on transient network errors. + +**Migration**: if you previously polled `GET /health` to gate traffic on database readiness, switch to `GET /ready` (or `client.ready()`). `/health` is still available and still returns `200`, but it now indicates liveness only. diff --git a/apps/ensindexer/src/lib/ensrainbow/singleton.test.ts b/apps/ensindexer/src/lib/ensrainbow/singleton.test.ts new file mode 100644 index 0000000000..c438e92b11 --- /dev/null +++ b/apps/ensindexer/src/lib/ensrainbow/singleton.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it } from "vitest"; + +import "@/lib/__test__/mockLogger"; + +import { setupConfigMock } from "@/lib/__test__/mockConfig"; + +setupConfigMock(); + +import { EnsRainbowHttpError } from "@ensnode/ensrainbow-sdk"; + +import { shouldRetryReadinessCheck } from "./singleton"; + +/** + * `shouldRetryReadinessCheck` is the heart of the readiness-check retry policy used by + * `waitForEnsRainbowToBeReady`. The integration with `p-retry` is a thin wiring (passing this + * predicate into `pRetry({ shouldRetry })`), so we exhaustively unit-test the predicate here + * rather than running the full retry loop with fake timers (which is fragile against `p-retry` + * internals and module-cache resets). + */ +describe("shouldRetryReadinessCheck", () => { + it("retries on EnsRainbowHttpError with status 503 (still bootstrapping)", () => { + const error = new EnsRainbowHttpError("not ready", 503, "Service Unavailable"); + expect(shouldRetryReadinessCheck(error)).toBe(true); + }); + + it("aborts on EnsRainbowHttpError with status 404 (likely misconfigured base URL)", () => { + const error = new EnsRainbowHttpError("not found", 404, "Not Found"); + expect(shouldRetryReadinessCheck(error)).toBe(false); + }); + + it("aborts on EnsRainbowHttpError with status 500 (server error)", () => { + const error = new EnsRainbowHttpError("boom", 500, "Internal Server Error"); + expect(shouldRetryReadinessCheck(error)).toBe(false); + }); + + it("aborts on EnsRainbowHttpError with status 502 (bad gateway)", () => { + const error = new EnsRainbowHttpError("bad gateway", 502, "Bad Gateway"); + expect(shouldRetryReadinessCheck(error)).toBe(false); + }); + + it("aborts on EnsRainbowHttpError with status 401 (auth misconfiguration)", () => { + const error = new EnsRainbowHttpError("unauthorized", 401, "Unauthorized"); + expect(shouldRetryReadinessCheck(error)).toBe(false); + }); + + it("retries on plain Error (network/DNS/ECONNREFUSED), since these are transient during cold start", () => { + expect(shouldRetryReadinessCheck(new TypeError("fetch failed"))).toBe(true); + expect(shouldRetryReadinessCheck(new Error("connect ECONNREFUSED 127.0.0.1:3223"))).toBe(true); + }); + + it("retries on non-Error rejection values (defensive fallback)", () => { + expect(shouldRetryReadinessCheck("string error")).toBe(true); + expect(shouldRetryReadinessCheck(undefined)).toBe(true); + expect(shouldRetryReadinessCheck(null)).toBe(true); + expect(shouldRetryReadinessCheck({ message: "weird" })).toBe(true); + }); +}); diff --git a/apps/ensindexer/src/lib/ensrainbow/singleton.ts b/apps/ensindexer/src/lib/ensrainbow/singleton.ts index c6e11a2100..9462b7bb86 100644 --- a/apps/ensindexer/src/lib/ensrainbow/singleton.ts +++ b/apps/ensindexer/src/lib/ensrainbow/singleton.ts @@ -3,7 +3,7 @@ import config from "@/config"; import { secondsToMilliseconds } from "date-fns"; import pRetry from "p-retry"; -import { EnsRainbowApiClient } from "@ensnode/ensrainbow-sdk"; +import { EnsRainbowApiClient, EnsRainbowHttpError } from "@ensnode/ensrainbow-sdk"; import { logger } from "@/lib/logger"; @@ -96,6 +96,28 @@ export function waitForEnsRainbowToBeHealthy(): Promise { */ let waitForEnsRainbowToBeReadyPromise: Promise | undefined; +/** + * Determine whether a readiness-check failure should be retried. + * + * Retry policy: + * - HTTP 503 (`ServiceUnavailable`) — ENSRainbow is still bootstrapping. Retryable. + * - Any other `EnsRainbowHttpError` (e.g. 404, 500) — almost certainly indicates a + * misconfigured `ENSRAINBOW_URL`, a broken instance, or routing/ingress issue. These do + * not fix themselves over the course of an hour, so we abort fast to surface the + * configuration/outage problem instead of stalling startup for ~60 minutes. + * - Anything else (network errors like `ECONNREFUSED`/DNS failures, JSON parse errors, + * etc.) — retryable. These are common during cold start, before the ENSRainbow HTTP + * server has bound its port. + * + * Exported for testing. + */ +export function shouldRetryReadinessCheck(error: unknown): boolean { + if (error instanceof EnsRainbowHttpError) { + return error.status === 503; + } + return true; +} + /** * Wait for ENSRainbow to be ready * @@ -103,10 +125,15 @@ let waitForEnsRainbowToBeReadyPromise: Promise | undefined; * * Note: It may take 30+ minutes for the ENSRainbow instance to become ready in * a cold start scenario. We use retries with a fixed interval between attempts - * for the ENSRainbow health check to allow for ample time for ENSRainbow to - * become ready. + * for the ENSRainbow readiness check to allow for ample time for bootstrap to + * complete. + * + * Non-503 HTTP failures (e.g. 404 misrouting, 500 server errors) abort retries + * immediately via {@link shouldRetryReadinessCheck}, so configuration/outage + * problems surface quickly instead of being masked by an hour of retries. * - * @throws When ENSRainbow fails to become ready after all configured retry attempts. + * @throws When ENSRainbow fails to become ready after all configured retry attempts, + * or as soon as a non-retryable error (e.g. non-503 HTTP status) is encountered. * This error will trigger termination of the ENSIndexer process. */ export function waitForEnsRainbowToBeReady(): Promise { @@ -119,30 +146,30 @@ export function waitForEnsRainbowToBeReady(): Promise { ensRainbowInstance: ensRainbowUrl.href, }); - waitForEnsRainbowToBeReadyPromise = pRetry( - // TODO: replace this count check with an explicit `ready()` method in ENSRainbow Client. - async () => { - const { count } = await ensRainbowClient.count(); - - if (count === 0) { - throw new Error("ENSRainbow instance is not ready yet."); - } - }, - { - retries: 60, // This allows for a total of over 1 hour of retries with 1 minute between attempts. - minTimeout: secondsToMilliseconds(60), - maxTimeout: secondsToMilliseconds(60), - onFailedAttempt: ({ attemptNumber, retriesLeft }) => { - logger.warn({ - msg: `ENSRainbow readiness check failed`, - attempt: attemptNumber, - retriesLeft, - ensRainbowInstance: ensRainbowUrl.href, - advice: `This might be due to ENSRainbow having a cold start, which can take 30+ minutes.`, - }); - }, + waitForEnsRainbowToBeReadyPromise = pRetry(async () => ensRainbowClient.ready(), { + retries: 60, // This allows for a total of over 1 hour of retries with 1 minute between attempts. + minTimeout: secondsToMilliseconds(60), + maxTimeout: secondsToMilliseconds(60), + shouldRetry: ({ error }) => shouldRetryReadinessCheck(error), + onFailedAttempt: ({ error, attemptNumber, retriesLeft }) => { + const willAbort = !shouldRetryReadinessCheck(error); + const isHttpError = error instanceof EnsRainbowHttpError; + logger.warn({ + msg: willAbort + ? `ENSRainbow readiness check failed with a non-retryable error; aborting retries` + : `ENSRainbow readiness check failed`, + attempt: attemptNumber, + retriesLeft, + // Always surface the error on abort or final attempt; otherwise keep logs concise. + error: willAbort || retriesLeft === 0 ? error : undefined, + httpStatus: isHttpError ? error.status : undefined, + ensRainbowInstance: ensRainbowUrl.href, + advice: willAbort + ? `This usually indicates a misconfigured ENSRAINBOW_URL, a broken ENSRainbow instance, or an ingress/routing issue. Verify the URL points at a healthy ENSRainbow server.` + : `This might be due to ENSRainbow still bootstrapping its database, which can take 30+ minutes during a cold start.`, + }); }, - ) + }) .then(() => { logger.info({ msg: `ENSRainbow instance is ready`, @@ -150,14 +177,23 @@ export function waitForEnsRainbowToBeReady(): Promise { }); }) .catch((error) => { + const errorMessage = error instanceof Error ? error.message : "Unknown error"; + const isHttpError = error instanceof EnsRainbowHttpError; + const isAbort = isHttpError && error.status !== 503; + logger.error({ - msg: `ENSRainbow readiness check failed after multiple attempts`, + msg: isAbort + ? `ENSRainbow readiness check aborted due to non-retryable HTTP error` + : `ENSRainbow readiness check failed after multiple attempts`, error, + httpStatus: isHttpError ? error.status : undefined, ensRainbowInstance: ensRainbowUrl.href, }); - // Throw the error to terminate the ENSIndexer process due to the failed health check of a critical dependency - throw error; + // Throw the error to terminate the ENSIndexer process due to the failed readiness check of a critical dependency + throw new Error(errorMessage, { + cause: error instanceof Error ? error : undefined, + }); }); return waitForEnsRainbowToBeReadyPromise; diff --git a/apps/ensindexer/src/lib/public-config-builder/public-config-builder.test.ts b/apps/ensindexer/src/lib/public-config-builder/public-config-builder.test.ts index c52966478f..b34c7ffd6b 100644 --- a/apps/ensindexer/src/lib/public-config-builder/public-config-builder.test.ts +++ b/apps/ensindexer/src/lib/public-config-builder/public-config-builder.test.ts @@ -230,9 +230,49 @@ describe("PublicConfigBuilder", () => { expect(result).toBe(customConfig); expect(result.isSubgraphCompatible).toBe(false); }); + + it("awaits readiness before fetching ENSRainbow config", async () => { + const callOrder: string[] = []; + const ensRainbowClientMock = { + config: vi.fn().mockImplementation(async () => { + callOrder.push("config"); + return mockEnsRainbowConfig; + }), + } as unknown as EnsRainbow.ApiClient; + const waitForReady = vi.fn().mockImplementation(async () => { + callOrder.push("wait"); + }); + + setupStandardMocks(); + const mockPublicConfig = createMockPublicConfig(); + vi.mocked(validateEnsIndexerPublicConfig).mockReturnValue(mockPublicConfig); + + const builder = new PublicConfigBuilder(ensRainbowClientMock, waitForReady); + const result = await builder.getPublicConfig(); + + expect(waitForReady).toHaveBeenCalledTimes(1); + expect(ensRainbowClientMock.config).toHaveBeenCalledTimes(1); + expect(callOrder).toEqual(["wait", "config"]); + expect(result).toBe(mockPublicConfig); + }); }); describe("getPublicConfig() - error handling", () => { + it("throws when readiness check fails and does not call config()", async () => { + const readinessError = new Error("ENSRainbow not ready"); + const ensRainbowClientMock = { + config: vi.fn(), + } as unknown as EnsRainbow.ApiClient; + const waitForReady = vi.fn().mockRejectedValue(readinessError); + + const builder = new PublicConfigBuilder(ensRainbowClientMock, waitForReady); + + await expect(builder.getPublicConfig()).rejects.toThrow(readinessError); + expect(waitForReady).toHaveBeenCalledTimes(1); + expect(ensRainbowClientMock.config).not.toHaveBeenCalled(); + expect(validateEnsIndexerPublicConfig).not.toHaveBeenCalled(); + }); + it("throws when ENSRainbow client config() fails", async () => { // Arrange const ensRainbowError = new Error("ENSRainbow service unavailable"); diff --git a/apps/ensindexer/src/lib/public-config-builder/public-config-builder.ts b/apps/ensindexer/src/lib/public-config-builder/public-config-builder.ts index 9b132ddcee..a894a66be8 100644 --- a/apps/ensindexer/src/lib/public-config-builder/public-config-builder.ts +++ b/apps/ensindexer/src/lib/public-config-builder/public-config-builder.ts @@ -19,6 +19,15 @@ export class PublicConfigBuilder { */ private ensRainbowClient: EnsRainbow.ApiClient; + /** + * One-time async readiness hook awaited before the first + * `ensRainbowClient.config()` invocation, so callers don't race ENSRainbow's + * background bootstrap. Defaults to a no-op for callers that don't need to + * gate on readiness (e.g. tests or environments where ENSRainbow is already + * known to be ready). + */ + private waitForEnsRainbowReady: () => Promise; + /** * Immutable ENSIndexer Public Config * @@ -29,9 +38,15 @@ export class PublicConfigBuilder { /** * @param ensRainbowClient ENSRainbow Client instance used to fetch ENSRainbow Public Config + * @param waitForEnsRainbowReady One-time async readiness hook awaited before the first + * `ensRainbowClient.config()` invocation. Defaults to a no-op. */ - constructor(ensRainbowClient: EnsRainbow.ApiClient) { + constructor( + ensRainbowClient: EnsRainbow.ApiClient, + waitForEnsRainbowReady: () => Promise = async () => {}, + ) { this.ensRainbowClient = ensRainbowClient; + this.waitForEnsRainbowReady = waitForEnsRainbowReady; } /** @@ -47,6 +62,8 @@ export class PublicConfigBuilder { */ async getPublicConfig(): Promise { if (typeof this.immutablePublicConfig === "undefined") { + await this.waitForEnsRainbowReady(); + const [versionInfo, ensRainbowPublicConfig] = await Promise.all([ this.getEnsIndexerVersionInfo(), // TODO: remove dependency on ENSRainbow by dropping `ensRainbowPublicConfig` from `EnsIndexerPublicConfig`. diff --git a/apps/ensindexer/src/lib/public-config-builder/singleton.ts b/apps/ensindexer/src/lib/public-config-builder/singleton.ts index 606fa7ca85..5252be2910 100644 --- a/apps/ensindexer/src/lib/public-config-builder/singleton.ts +++ b/apps/ensindexer/src/lib/public-config-builder/singleton.ts @@ -1,4 +1,7 @@ -import { ensRainbowClient } from "@/lib/ensrainbow/singleton"; +import { ensRainbowClient, waitForEnsRainbowToBeReady } from "@/lib/ensrainbow/singleton"; import { PublicConfigBuilder } from "@/lib/public-config-builder/public-config-builder"; -export const publicConfigBuilder = new PublicConfigBuilder(ensRainbowClient); +export const publicConfigBuilder = new PublicConfigBuilder( + ensRainbowClient, + waitForEnsRainbowToBeReady, +); diff --git a/apps/ensrainbow/Dockerfile b/apps/ensrainbow/Dockerfile index b4b4736c7f..bd0f719832 100644 --- a/apps/ensrainbow/Dockerfile +++ b/apps/ensrainbow/Dockerfile @@ -1,14 +1,10 @@ # Runtime image for ENSRainbow FROM node:24-slim AS runtime -# Install only essential system dependencies for runtime -# netcat-openbsd: Used during container initialization to keep the service port open -# while the database is being downloaded and validated (which can take up to 20 minutes). -# Without a listener on the port during this phase, Render's health checks fail and orchestration -# systems may mark the container as unhealthy or restart it prematurely. See scripts/entrypoint.sh for implementation details. -# Note: The netcat listener only keeps the port open and accepts connections; it does not respond -# to HTTP requests, so it will not work with Docker HEALTHCHECK commands that expect HTTP responses. See https://github.com/namehash/ensnode/issues/1610 -RUN apt-get update && apt-get install -y wget tar netcat-openbsd && rm -rf /var/lib/apt/lists/* +# Install only essential system dependencies for runtime. +# `wget` and `tar` are required by scripts/download-prebuilt-database.sh, which the in-process +# entrypoint spawns to fetch the pre-built database archive. +RUN apt-get update && apt-get install -y wget tar && rm -rf /var/lib/apt/lists/* # Set up pnpm ENV PNPM_HOME="/pnpm" @@ -34,16 +30,18 @@ COPY apps/ensrainbow/tsconfig.json apps/ensrainbow/ COPY apps/ensrainbow/vitest.config.ts apps/ensrainbow/ # Make scripts executable -RUN chmod +x /app/apps/ensrainbow/scripts/entrypoint.sh RUN chmod +x /app/apps/ensrainbow/scripts/download-prebuilt-database.sh # Set environment variables ENV NODE_ENV=production -# PORT will be used by entrypoint.sh, defaulting to 3223 if not set at runtime -# DB_SCHEMA_VERSION, LABEL_SET_ID, LABEL_SET_VERSION must be provided at runtime to the entrypoint +# PORT is consumed by the entrypoint command, defaulting to 3223 if not set at runtime. +# DB_SCHEMA_VERSION, LABEL_SET_ID, LABEL_SET_VERSION must be provided at runtime to the entrypoint. -# Default port, can be overridden by PORT env var for the entrypoint/serve command +# Default port, can be overridden by PORT env var for the entrypoint command EXPOSE 3223 -# Set the entrypoint -ENTRYPOINT ["/app/apps/ensrainbow/scripts/entrypoint.sh"] +# The entrypoint binds the HTTP server immediately (so /health and /ready respond while the +# database is still being downloaded) and runs download + validation in the background. +# See src/commands/entrypoint-command.ts for implementation details. +WORKDIR /app/apps/ensrainbow +ENTRYPOINT ["pnpm", "run", "entrypoint"] diff --git a/apps/ensrainbow/package.json b/apps/ensrainbow/package.json index 9fd554c957..cc7ff06267 100644 --- a/apps/ensrainbow/package.json +++ b/apps/ensrainbow/package.json @@ -13,6 +13,7 @@ "homepage": "https://github.com/namehash/ensnode/tree/main/apps/ensrainbow", "scripts": { "serve": "tsx src/cli.ts serve", + "entrypoint": "tsx src/cli.ts entrypoint", "ingest": "tsx src/cli.ts ingest", "ingest-ensrainbow": "tsx src/cli.ts ingest-ensrainbow", "validate": "tsx src/cli.ts validate", diff --git a/apps/ensrainbow/scripts/entrypoint.sh b/apps/ensrainbow/scripts/entrypoint.sh deleted file mode 100644 index 8bcc767ef0..0000000000 --- a/apps/ensrainbow/scripts/entrypoint.sh +++ /dev/null @@ -1,158 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Default values (can be overridden by environment variables) -DB_SCHEMA_VERSION="${DB_SCHEMA_VERSION:-}" -LABEL_SET_ID="${LABEL_SET_ID:-}" -LABEL_SET_VERSION="${LABEL_SET_VERSION:-}" -PORT="${PORT:-3223}" -DATA_DIR_NAME="data" # Name of the data directory within /app/apps/ensrainbow -APP_DIR="/app/apps/ensrainbow" -FINAL_DATA_DIR="${APP_DIR}/${DATA_DIR_NAME}" -DOWNLOAD_TEMP_DIR="${DOWNLOAD_TEMP_DIR:-/tmp/ensrainbow_download_temp}" -MARKER_FILE="${FINAL_DATA_DIR}/ensrainbow_db_ready" - -# Path for the data subdirectory, relative to APP_DIR. -# This assumes data is in ${APP_DIR}/${DATA_DIR_NAME}/data-${LABEL_SET_ID}/ -DB_SUBDIR_PATH="${DATA_DIR_NAME}/data-${LABEL_SET_ID}_${LABEL_SET_VERSION}" - -# Ensure required variables for download are set if we might download -if [ ! -f "${MARKER_FILE}" ]; then - if [ -z "$DB_SCHEMA_VERSION" ] || [ -z "$LABEL_SET_ID" ] || [ -z "$LABEL_SET_VERSION" ]; then - echo "Error: DB_SCHEMA_VERSION, LABEL_SET_ID, and LABEL_SET_VERSION environment variables must be set for initial ENSRainbow database download." - exit 1 - fi -fi - -echo "ENSRainbow Startup Script" -echo "-------------------------" -echo "Database Schema Version: $DB_SCHEMA_VERSION" -echo "Label Set ID: $LABEL_SET_ID" -echo "Label Set Version: $LABEL_SET_VERSION" -echo "Target Port: $PORT" -echo "Application Directory: $APP_DIR" -echo "Final Data Directory: $FINAL_DATA_DIR" -echo "Marker File: $MARKER_FILE" -echo "Database Directory: $DB_SUBDIR_PATH" -echo "-------------------------" - -# Start netcat listener in background -# See Dockerfile for documentation on why netcat is used during initialization -echo "Starting netcat listener on port ${PORT}..." -nc -lk -p "${PORT}" & -NC_PID=$! - -# Function to cleanup netcat on exit -cleanup_nc() { - if [ -n "${NC_PID:-}" ] && kill -0 "${NC_PID}" 2>/dev/null; then - echo "Killing netcat listener (PID: ${NC_PID})..." - kill "${NC_PID}" 2>/dev/null || true - wait "${NC_PID}" 2>/dev/null || true - fi -} - -# Register cleanup function to run on script exit -trap cleanup_nc EXIT - -# Change to the application directory for pnpm commands -cd "${APP_DIR}" - -# Check if data directory and marker file exist and if data is valid -if [ -d "${FINAL_DATA_DIR}" ] && [ -f "${MARKER_FILE}" ]; then - echo "Existing data directory and marker file found at ${FINAL_DATA_DIR}." - echo "Running database validation (lite) on existing data in ${DB_SUBDIR_PATH}..." - if pnpm run validate:lite --data-dir "${DB_SUBDIR_PATH}"; then - echo "Existing database is valid. Skipping download and extraction." - else - echo "Existing database validation failed. Will attempt to re-download." - echo "Cleaning up existing data directory before re-download..." - rm -rf "${FINAL_DATA_DIR}/"* # Remove potentially corrupt data - # The marker file is implicitly removed with FINAL_DATA_DIR - fi -fi - -# If marker file doesn't exist (meaning data is not ready or was cleared) -if [ ! -f "${MARKER_FILE}" ]; then - echo "Database not found or not ready. Proceeding with download and extraction." - - # 1. Ensure required variables for download are set (double check, crucial if logic path leads here) - if [ -z "$DB_SCHEMA_VERSION" ] || [ -z "$LABEL_SET_ID" ] || [ -z "$LABEL_SET_VERSION" ]; then - echo "Critical Error: DB_SCHEMA_VERSION, LABEL_SET_ID, and LABEL_SET_VERSION must be set to download the database." - exit 1 - fi - - # 2. Clean up any existing data and prepare directories - echo "Preparing directories for download..." - rm -rf "${FINAL_DATA_DIR}"/* # Ensure clean state if previous attempt failed mid-way - mkdir -p "${FINAL_DATA_DIR}" - rm -rf "${DOWNLOAD_TEMP_DIR}" # Clean up temp dir from previous runs if any - mkdir -p "${DOWNLOAD_TEMP_DIR}" - - # 3. Download the database archive - echo "Downloading pre-built ENSRainbowdatabase from labelset server (Schema: $DB_SCHEMA_VERSION, Label Set ID: - $LABEL_SET_ID, Label Set Version: $LABEL_SET_VERSION)..." - if ! OUT_DIR="${DOWNLOAD_TEMP_DIR}" \ - ENSRAINBOW_LABELSET_SERVER_URL="${ENSRAINBOW_LABELSET_SERVER_URL:-}" \ - "${APP_DIR}/scripts/download-prebuilt-database.sh" "$DB_SCHEMA_VERSION" "$LABEL_SET_ID" "$LABEL_SET_VERSION"; then - echo "Error: Failed to download database." - ls -R "${DOWNLOAD_TEMP_DIR}" # List contents for debugging - rm -rf "${DOWNLOAD_TEMP_DIR}" - exit 1 - fi - - DB_ARCHIVE_BASENAME="${LABEL_SET_ID}_${LABEL_SET_VERSION}.tgz" - DB_ARCHIVE_PATH="${DOWNLOAD_TEMP_DIR}/databases/${DB_SCHEMA_VERSION}/${DB_ARCHIVE_BASENAME}" - - if [ ! -f "$DB_ARCHIVE_PATH" ]; then - echo "Error: Expected database archive file not found at $DB_ARCHIVE_PATH after download attempt." - ls -R "${DOWNLOAD_TEMP_DIR}" - rm -rf "${DOWNLOAD_TEMP_DIR}" - exit 1 - fi - echo "Database archive downloaded to: $DB_ARCHIVE_PATH" - - # 4. Extract the database archive - echo "Extracting database archive..." - if ! tar -xzf "${DB_ARCHIVE_PATH}" -C "${FINAL_DATA_DIR}" --strip-components=1; then - echo "Error: Failed to extract database archive." - rm -f "${DB_ARCHIVE_PATH}" - rm -rf "${DOWNLOAD_TEMP_DIR}" - exit 1 - fi - echo "Database archive extracted to ${FINAL_DATA_DIR}" - - # 5. Clean up downloaded archive and temporary directory - echo "Cleaning up downloaded files..." - rm -f "${DB_ARCHIVE_PATH}" - rm -rf "${DOWNLOAD_TEMP_DIR}" - echo "Cleanup complete." - - # 6. Validate the newly extracted database - echo "Running database validation (lite) on newly extracted data in ${DB_SUBDIR_PATH}..." - if pnpm run validate:lite --data-dir "${DB_SUBDIR_PATH}"; then - echo "Newly extracted database is valid." - # Create marker file upon successful download, extraction, and validation - echo "Creating marker file: ${MARKER_FILE}" - touch "${MARKER_FILE}" - else - echo "Error: Newly extracted database validation failed! Data may be corrupted." - echo "Please check logs and database archive source. The marker file will not be created." - # Depending on policy, you might want to exit 1 here or clean up FINAL_DATA_DIR - exit 1 # Exit if validation fails to prevent running with bad data - fi -fi # End of download and extraction block - -# 7. Kill netcat before starting the server -echo "Stopping netcat listener before starting server..." -# Remove the EXIT trap before manual cleanup to avoid double cleanup on signals. -trap - EXIT -cleanup_nc -# Clear the PID now that we've manually cleaned up -NC_PID="" - -# 8. Start the ENSRainbow server -echo "Starting ENSRainbow server on port ${PORT} using data from ${APP_DIR}/${DB_SUBDIR_PATH}..." -echo "Sleeping for 2 seconds to allow netcat to release the port..." -sleep 2 -# pnpm commands were run from APP_DIR, ensure serve also sees --data-dir correctly -exec pnpm run serve --port "${PORT}" --data-dir "${DB_SUBDIR_PATH}" diff --git a/apps/ensrainbow/src/cli.ts b/apps/ensrainbow/src/cli.ts index 1ac7c08e57..904a0deecb 100644 --- a/apps/ensrainbow/src/cli.ts +++ b/apps/ensrainbow/src/cli.ts @@ -11,11 +11,12 @@ import type { ArgumentsCamelCase, Argv } from "yargs"; import { hideBin } from "yargs/helpers"; import yargs from "yargs/yargs"; -import { buildLabelSetId } from "@ensnode/ensnode-sdk"; +import { buildLabelSetId, buildLabelSetVersion } from "@ensnode/ensnode-sdk"; import { PortNumberSchema } from "@ensnode/ensnode-sdk/internal"; import { type ConvertSqlCommandCliArgs, convertCommand } from "@/commands/convert-command-sql"; import { type ConvertCsvCommandCliArgs, convertCsvCommand } from "@/commands/convert-csv-command"; +import { entrypointCommand } from "@/commands/entrypoint-command"; import { type IngestProtobufCommandCliArgs, ingestProtobufCommand, @@ -28,6 +29,22 @@ export interface CLIOptions { exitProcess?: boolean; } +/** + * yargs-parsed argument shape for the `entrypoint` command. + * + * This interface represents the post-coercion values consumed by the handler. + * Runtime coercion is performed by yargs via `buildLabelSetId` and + * `buildLabelSetVersion`. + */ +interface EntrypointCommandCliArgs { + port: number; + "data-dir": string; + "db-schema-version": number; + "label-set-id": string; + "label-set-version": number; + "download-temp-dir"?: string; +} + export function createCLI(options: CLIOptions = {}) { const { exitProcess = true } = options; @@ -111,6 +128,71 @@ export function createCLI(options: CLIOptions = {}) { await serverCommand(serveCommandConfig); }, ) + .command( + "entrypoint", + "Start the ENS Rainbow API server immediately and bootstrap the database in the background", + (yargs: Argv) => { + return yargs + .option("port", { + type: "number", + description: "Port to listen on (overrides PORT env var if both are set)", + default: envConfig.port, + coerce: (port: number) => { + const result = PortNumberSchema.safeParse(port); + if (!result.success) { + const firstError = result.error.issues[0]; + throw new Error(`Invalid port: ${firstError?.message ?? "invalid port number"}`); + } + return result.data; + }, + }) + .option("data-dir", { + type: "string", + description: "Directory containing LevelDB data", + default: envConfig.dataDir, + }) + .option("db-schema-version", { + type: "number", + description: + "Expected database schema version (falls back to DB_SCHEMA_VERSION env var)", + default: envConfig.dbSchemaVersion, + }) + .option("label-set-id", { + type: "string", + description: "Label set id to download (falls back to LABEL_SET_ID env var)", + default: process.env.LABEL_SET_ID, + demandOption: !process.env.LABEL_SET_ID, + }) + .coerce("label-set-id", buildLabelSetId) + .option("label-set-version", { + type: "number", + description: + "Label set version to download (falls back to LABEL_SET_VERSION env var)", + default: process.env.LABEL_SET_VERSION, + demandOption: !process.env.LABEL_SET_VERSION, + }) + .coerce("label-set-version", buildLabelSetVersion) + .option("download-temp-dir", { + type: "string", + description: + "Temporary directory used to stage downloaded archives before extraction " + + "(defaults to /.download-temp)", + default: process.env.DOWNLOAD_TEMP_DIR, + }); + }, + async (argv: ArgumentsCamelCase) => { + const dataDir = parseDataDirFromCli(argv["data-dir"]); + await entrypointCommand({ + port: argv.port, + dataDir, + dbSchemaVersion: argv["db-schema-version"], + labelSetId: argv["label-set-id"], + labelSetVersion: argv["label-set-version"], + downloadTempDir: argv["download-temp-dir"], + labelsetServerUrl: process.env.ENSRAINBOW_LABELSET_SERVER_URL, + }); + }, + ) .command( "validate", "Validate the integrity of the LevelDB database", diff --git a/apps/ensrainbow/src/commands/entrypoint-command.test.ts b/apps/ensrainbow/src/commands/entrypoint-command.test.ts new file mode 100644 index 0000000000..6c3f3d4cd5 --- /dev/null +++ b/apps/ensrainbow/src/commands/entrypoint-command.test.ts @@ -0,0 +1,285 @@ +import { EventEmitter } from "node:events"; +import { existsSync } from "node:fs"; +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { dirname, join } from "node:path"; + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { buildLabelSetId, buildLabelSetVersion } from "@ensnode/ensnode-sdk"; +import { type EnsRainbow, StatusCode } from "@ensnode/ensrainbow-sdk"; + +import type { AbsolutePath, DbSchemaVersion } from "@/config/types"; +import { DB_SCHEMA_VERSION, ENSRainbowDB } from "@/lib/database"; +import { DbNotReadyError, ENSRainbowServer } from "@/lib/server"; + +let closeHttpServerImpl: undefined | ((server: unknown) => Promise); +vi.mock("@/utils/http-server", async () => { + const actual = await vi.importActual("@/utils/http-server"); + return { + ...actual, + closeHttpServer: async (server: unknown) => { + if (closeHttpServerImpl) return closeHttpServerImpl(server); + return actual.closeHttpServer(server as never); + }, + }; +}); + +import { + __TESTING__, + DB_READY_MARKER_FILENAME, + type EntrypointCommandHandle, + entrypointCommand, +} from "./entrypoint-command"; + +let spawnImpl: + | undefined + | (( + command: string, + args: string[], + options: { stdio: "inherit"; env: Record }, + ) => any); + +vi.mock("node:child_process", () => { + return { + spawn: ( + command: string, + args: string[], + options: { stdio: "inherit"; env: Record }, + ) => { + if (!spawnImpl) { + throw new Error("spawnImpl not set in test"); + } + return spawnImpl(command, args, options); + }, + }; +}); + +/** + * These tests exercise the idempotent bootstrap path of the entrypoint command, where the marker + * file and a valid on-disk database already exist. We do not exercise the actual download script + * here (it requires network + a labelset server). + */ +describe("entrypointCommand (existing DB on disk)", () => { + const labelSetId = buildLabelSetId("entrypoint-test"); + const labelSetVersion = buildLabelSetVersion(0); + const port = 3226; + const endpoint = `http://localhost:${port}`; + + let testDataDir: string; + let markerFile: string; + let handle: EntrypointCommandHandle | undefined; + + beforeEach(async () => { + testDataDir = await mkdtemp(join(tmpdir(), "ensrainbow-test-entrypoint-")); + const dbSubdir = join(testDataDir, `data-${labelSetId}_${labelSetVersion}`); + markerFile = join(testDataDir, DB_READY_MARKER_FILENAME); + + // Seed a valid-looking database and marker so the entrypoint skips the download step. + const db = await ENSRainbowDB.create(dbSubdir); + await db.setPrecalculatedRainbowRecordCount(0); + await db.markIngestionFinished(); + await db.setLabelSetId(labelSetId); + await db.setHighestLabelSetVersion(labelSetVersion); + await db.close(); + + await writeFile(markerFile, ""); + }); + + afterEach(async () => { + if (handle) { + await handle.close(); + handle = undefined; + } + await rm(testDataDir, { recursive: true, force: true }); + }); + + it("starts the HTTP server immediately and marks /ready after attaching the existing DB", async () => { + handle = await entrypointCommand({ + port, + dataDir: testDataDir as AbsolutePath, + dbSchemaVersion: DB_SCHEMA_VERSION as DbSchemaVersion, + labelSetId, + labelSetVersion, + registerSignalHandlers: false, + }); + + // /health should respond as soon as entrypointCommand returns (HTTP server is already bound). + const healthRes = await fetch(`${endpoint}/health`); + expect(healthRes.status).toBe(200); + const healthData = (await healthRes.json()) as EnsRainbow.HealthResponse; + expect(healthData).toEqual({ status: "ok" }); + await handle.bootstrapComplete; + + const readyRes = await fetch(`${endpoint}/ready`); + expect(readyRes.status).toBe(200); + + const configRes = await fetch(`${endpoint}/v1/config`); + expect(configRes.status).toBe(200); + const configData = (await configRes.json()) as EnsRainbow.ENSRainbowPublicConfig; + expect(configData.serverLabelSet.labelSetId).toBe(labelSetId); + expect(configData.serverLabelSet.highestLabelSetVersion).toBe(labelSetVersion); + + const countRes = await fetch(`${endpoint}/v1/labels/count`); + expect(countRes.status).toBe(200); + const countData = (await countRes.json()) as EnsRainbow.CountResponse; + expect(countData).toMatchObject({ status: StatusCode.Success, count: 0 }); + + // Marker should still be present after a successful idempotent attach. + expect(existsSync(markerFile)).toBe(true); + }); +}); + +describe("entrypointCommand (signal handlers)", () => { + const labelSetId = buildLabelSetId("entrypoint-signal-test"); + const labelSetVersion = buildLabelSetVersion(0); + const port = 3227; + + let testDataDir: string; + + beforeEach(async () => { + testDataDir = await mkdtemp(join(tmpdir(), "ensrainbow-test-entrypoint-signals-")); + const dbSubdir = join(testDataDir, `data-${labelSetId}_${labelSetVersion}`); + const markerFile = join(testDataDir, DB_READY_MARKER_FILENAME); + + const db = await ENSRainbowDB.create(dbSubdir); + await db.setPrecalculatedRainbowRecordCount(0); + await db.markIngestionFinished(); + await db.setLabelSetId(labelSetId); + await db.setHighestLabelSetVersion(labelSetVersion); + await db.close(); + + await writeFile(markerFile, ""); + }); + + afterEach(async () => { + closeHttpServerImpl = undefined; + await rm(testDataDir, { recursive: true, force: true }); + }); + + it("wraps SIGTERM/SIGINT handlers so shutdown failures don't become unhandled rejections", async () => { + closeHttpServerImpl = async () => { + throw new Error("closeHttpServer failed"); + }; + + let sigtermHandler: undefined | (() => void); + const onceSpy = vi.spyOn(process, "once").mockImplementation((( + event: string, + listener: (...args: any[]) => void, + ) => { + if (event === "SIGTERM") sigtermHandler = listener as () => void; + // Delegate to the original implementation so other listeners still work. + return EventEmitter.prototype.once.call(process, event, listener); + }) as typeof process.once); + + const unhandledRejection = vi.fn(); + process.once("unhandledRejection", unhandledRejection); + + let localHandle: EntrypointCommandHandle | undefined; + try { + localHandle = await entrypointCommand({ + port, + dataDir: testDataDir as AbsolutePath, + dbSchemaVersion: DB_SCHEMA_VERSION as DbSchemaVersion, + labelSetId, + labelSetVersion, + // Leave registerSignalHandlers enabled (default true) + }); + + expect(sigtermHandler).toBeTypeOf("function"); + sigtermHandler?.(); + // Ensure shutdown chain has settled before asserting on unhandled rejections. + await localHandle.close().catch(() => {}); + + expect(unhandledRejection).not.toHaveBeenCalled(); + } finally { + process.removeListener("unhandledRejection", unhandledRejection); + onceSpy.mockRestore(); + if (localHandle) { + await localHandle.close().catch(() => {}); + } + } + }); +}); + +describe("ENSRainbowServer (pending state smoke test)", () => { + it("createPending returns a server with isReady() === false and heal throwing DbNotReadyError", async () => { + const server = ENSRainbowServer.createPending(); + + expect(server.isReady()).toBe(false); + expect(server.serverLabelSet).toBeUndefined(); + + await expect( + server.heal("0x0000000000000000000000000000000000000000000000000000000000000000", { + labelSetId: undefined, + }), + ).rejects.toBeInstanceOf(DbNotReadyError); + }); +}); + +describe("downloadAndExtractDatabase (stale dbSubdir cleanup)", () => { + const dbSchemaVersion = DB_SCHEMA_VERSION as DbSchemaVersion; + const labelSetId = buildLabelSetId("entrypoint-extract-test"); + const labelSetVersion = buildLabelSetVersion(0); + + let testDataDir: string; + let downloadTempDir: string; + let dbSubdir: string; + + beforeEach(async () => { + testDataDir = await mkdtemp(join(tmpdir(), "ensrainbow-test-entrypoint-extract-")); + downloadTempDir = join(testDataDir, ".download-temp"); + dbSubdir = join(testDataDir, `data-${labelSetId}_${labelSetVersion}`); + await mkdir(dbSubdir, { recursive: true }); + await writeFile(join(dbSubdir, "STALE_FILE"), "stale"); + }); + + afterEach(async () => { + await rm(testDataDir, { recursive: true, force: true }); + }); + + it("removes existing dbSubdir before spawning tar", async () => { + let tarSawDbSubdir = true; + spawnImpl = (command: string) => { + const child = new EventEmitter() as any; + child.exitCode = null; + child.signalCode = null; + child.kill = () => true; + + queueMicrotask(async () => { + try { + if (command === "bash") { + const archivePath = join( + downloadTempDir, + "databases", + String(dbSchemaVersion), + `${labelSetId}_${labelSetVersion}.tgz`, + ); + await mkdir(dirname(archivePath), { recursive: true }); + await writeFile(archivePath, "not-a-real-tarball"); + } else if (command === "tar") { + tarSawDbSubdir = existsSync(dbSubdir); + } + + child.exitCode = 0; + child.emit("exit", 0, null); + } catch (error) { + child.emit("error", error); + } + }); + + return child; + }; + + await __TESTING__.downloadAndExtractDatabase({ + dataDir: testDataDir, + dbSchemaVersion, + labelSetId, + labelSetVersion, + downloadTempDir, + signal: new AbortController().signal, + }); + + expect(tarSawDbSubdir).toBe(false); + }); +}); diff --git a/apps/ensrainbow/src/commands/entrypoint-command.ts b/apps/ensrainbow/src/commands/entrypoint-command.ts new file mode 100644 index 0000000000..8e657eb47f --- /dev/null +++ b/apps/ensrainbow/src/commands/entrypoint-command.ts @@ -0,0 +1,458 @@ +import { spawn } from "node:child_process"; +import { existsSync } from "node:fs"; +import { mkdir, rm, writeFile } from "node:fs/promises"; +import { dirname, join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { serve } from "@hono/node-server"; + +import { stringifyConfig } from "@ensnode/ensnode-sdk/internal"; +import type { EnsRainbow } from "@ensnode/ensrainbow-sdk"; + +import { buildEnsRainbowPublicConfig } from "@/config/public"; +import type { AbsolutePath, DbConfig, DbSchemaVersion } from "@/config/types"; +import { createApi } from "@/lib/api"; +import { ENSRainbowDB } from "@/lib/database"; +import { buildDbConfig, ENSRainbowServer } from "@/lib/server"; +import { closeHttpServer } from "@/utils/http-server"; +import { logger } from "@/utils/logger"; + +/** + * Grace period given to a spawned child process after SIGTERM before we escalate to SIGKILL + * during shutdown. + */ +const CHILD_PROCESS_KILL_GRACE_MS = 5_000; + +class BootstrapAbortedError extends Error { + constructor() { + super("ENSRainbow bootstrap aborted due to shutdown"); + this.name = "BootstrapAbortedError"; + } +} + +export interface EntrypointCommandOptions { + port: number; + dataDir: AbsolutePath; + dbSchemaVersion: DbSchemaVersion; + labelSetId: string; + labelSetVersion: number; + /** + * Temporary directory used to stage downloaded archives before extraction. + * Defaults to `/.download-temp`. + */ + downloadTempDir?: string; + /** + * Labelset server URL override. If unset, the download script uses its default. + */ + labelsetServerUrl?: string | undefined; + /** + * Whether to register SIGTERM/SIGINT shutdown handlers. Defaults to `true`. + * Tests should pass `false` to avoid leaking handlers across cases. + */ + registerSignalHandlers?: boolean; +} + +/** + * Handle returned by {@link entrypointCommand}. + */ +export interface EntrypointCommandHandle { + /** + * Resolves when bootstrap finishes or is aborted by shutdown. + * Never rejects: non-abort failures terminate the process via `process.exit(1)`. + */ + readonly bootstrapComplete: Promise; + close(): Promise; +} + +/** + * Name of the marker file written to `dataDir` once the database has been successfully + * downloaded, extracted, and validated. Matches the name used by the legacy `entrypoint.sh` + * so existing volumes remain compatible. + */ +export const DB_READY_MARKER_FILENAME = "ensrainbow_db_ready"; + +/** + * Starts HTTP immediately, bootstraps DB in the background, and wires graceful shutdown. + */ +export async function entrypointCommand( + options: EntrypointCommandOptions, +): Promise { + logger.info("ENSRainbow running with config:"); + logger.info(stringifyConfig(options, { pretty: true })); + + logger.info( + `ENSRainbow entrypoint starting HTTP server on port ${options.port} ` + + `(database will be bootstrapped in the background)`, + ); + + const ensRainbowServer = ENSRainbowServer.createPending(); + + let cachedPublicConfig: EnsRainbow.ENSRainbowPublicConfig | null = null; + let cachedDbConfig: DbConfig | null = null; + const app = createApi( + ensRainbowServer, + () => cachedPublicConfig, + () => cachedDbConfig, + ); + + const httpServer = serve({ + fetch: app.fetch, + port: options.port, + }); + + // Shared abort signal for `close()` and bootstrap work. + const bootstrapAborter = new AbortController(); + + // Tracks bootstrap task settlement so `close()` can await cleanup. + let signalBootstrapSettled!: () => void; + const bootstrapSettled = new Promise((resolvePromise) => { + signalBootstrapSettled = resolvePromise; + }); + + // Track signal listeners so close() can detach them when invoked programmatically + // (e.g., from tests). `process.once` only auto-removes after firing, so a manual close() + // would otherwise leak listeners until the process receives a signal. + let signalHandler: (() => void) | undefined; + + // Cache the in-flight shutdown so all callers (signal handler, programmatic close()) await + // the same work. A boolean guard would let later callers resolve immediately while the + // first close() is still tearing down resources. + let closePromise: Promise | undefined; + const close = (): Promise => { + if (closePromise) return closePromise; + + closePromise = (async () => { + logger.info("Shutting down server..."); + + if (signalHandler) { + process.removeListener("SIGTERM", signalHandler); + process.removeListener("SIGINT", signalHandler); + signalHandler = undefined; + } + + bootstrapAborter.abort(); + // Wait for bootstrap cleanup before closing shared resources. + await bootstrapSettled; + + let shutdownError: unknown; + + try { + await closeHttpServer(httpServer); + } catch (error) { + shutdownError = error; + logger.error(error, "Failed to close HTTP server during shutdown"); + } + + try { + await ensRainbowServer.close(); + } catch (error) { + if (shutdownError === undefined) { + shutdownError = error; + } + logger.error(error, "Failed to close ENSRainbow server/database during shutdown"); + } + + if (shutdownError !== undefined) { + throw shutdownError; + } + + logger.info("Server shutdown complete"); + })(); + + return closePromise; + }; + + if (options.registerSignalHandlers !== false) { + signalHandler = () => { + // Node does not await signal handlers; swallow errors to avoid unhandled rejections. + void close().catch(() => {}); + }; + + process.once("SIGTERM", signalHandler); + process.once("SIGINT", signalHandler); + } + + const bootstrapComplete = new Promise((resolvePromise) => { + // Defer bootstrap so the HTTP server starts accepting requests first. + setTimeout(() => { + runDbBootstrap(options, ensRainbowServer, bootstrapAborter.signal) + .then(({ publicConfig, dbConfig }) => { + cachedDbConfig = dbConfig; + cachedPublicConfig = publicConfig; + logger.info( + "ENSRainbow database bootstrap complete. Service is ready to serve heal requests.", + ); + resolvePromise(); + }) + .catch((error) => { + if (error instanceof BootstrapAbortedError || bootstrapAborter.signal.aborted) { + logger.info("ENSRainbow database bootstrap aborted due to shutdown"); + resolvePromise(); + return; + } + logger.error(error, "ENSRainbow database bootstrap failed - exiting"); + process.exit(1); + }) + .finally(() => { + signalBootstrapSettled(); + }); + }, 0); + }); + + return { bootstrapComplete, close }; +} + +/** + * Idempotent DB bootstrap pipeline. + * + * If marker + DB are present, reuse them; otherwise download + extract. + * Returns the public config and DB config for the attached DB. + */ +async function runDbBootstrap( + options: EntrypointCommandOptions, + ensRainbowServer: ENSRainbowServer, + signal: AbortSignal, +): Promise<{ publicConfig: EnsRainbow.ENSRainbowPublicConfig; dbConfig: DbConfig }> { + const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion } = options; + const downloadTempDir = options.downloadTempDir ?? join(dataDir, ".download-temp"); + const markerFile = join(dataDir, DB_READY_MARKER_FILENAME); + const dbSubdir = join(dataDir, `data-${labelSetId}_${labelSetVersion}`); + + await mkdir(dataDir, { recursive: true }); + + if (existsSync(markerFile) && existsSync(dbSubdir)) { + logger.info( + `Found existing ENSRainbow marker at ${markerFile}; attempting to open existing database at ${dbSubdir}`, + ); + // Track DB ownership so cleanup chooses the correct close path. + let existingDb: ENSRainbowDB | undefined; + let existingDbAttached = false; + try { + throwIfAborted(signal); + existingDb = await ENSRainbowDB.open(dbSubdir); + throwIfAborted(signal); + await ensRainbowServer.attachDb(existingDb); + existingDbAttached = true; + const dbConfig = await buildDbConfig(ensRainbowServer); + return { publicConfig: buildEnsRainbowPublicConfig(dbConfig), dbConfig }; + } catch (error) { + // Always release any opened DB handle/lock first, even when aborting. This prevents + // a leaked LevelDB lock when SIGTERM races a non-abort failure (e.g. attachDb throws + // while signal.aborted has just become true), since the previous abort-first rethrow + // skipped cleanup entirely. + if (existingDbAttached) { + try { + await ensRainbowServer.close(); + } catch (closeError) { + logger.warn( + closeError, + "Failed to close server while falling back to re-download; continuing", + ); + } + } else if (existingDb !== undefined) { + await safeClose(existingDb); + } + + if (error instanceof BootstrapAbortedError || signal.aborted) { + throw error; + } + + await rm(dbSubdir, { recursive: true, force: true }); + logger.warn( + error, + "Existing ENSRainbow database failed to open or validate; re-downloading from scratch", + ); + // Fall through to re-download. + } + } + + throwIfAborted(signal); + await downloadAndExtractDatabase({ + dataDir, + dbSchemaVersion, + labelSetId, + labelSetVersion, + downloadTempDir, + labelsetServerUrl: options.labelsetServerUrl, + signal, + }); + throwIfAborted(signal); + + logger.info(`Opening newly extracted database at ${dbSubdir}`); + const db = await ENSRainbowDB.open(dbSubdir); + let dbAttached = false; + try { + if (signal.aborted) { + throw new BootstrapAbortedError(); + } + + await ensRainbowServer.attachDb(db); + dbAttached = true; + + if (signal.aborted) { + throw new BootstrapAbortedError(); + } + + // Write marker only after a successful attach. + await writeFile(markerFile, ""); + + const dbConfig = await buildDbConfig(ensRainbowServer); + return { publicConfig: buildEnsRainbowPublicConfig(dbConfig), dbConfig }; + } catch (error) { + if (!dbAttached) { + await safeClose(db); + } else if (error instanceof BootstrapAbortedError || signal.aborted) { + try { + await ensRainbowServer.close(); + } catch (closeError) { + logger.warn( + closeError, + "Failed to close server while aborting after DB attach; continuing", + ); + } + } + throw error; + } +} + +function throwIfAborted(signal: AbortSignal): void { + if (signal.aborted) { + throw new BootstrapAbortedError(); + } +} + +async function safeClose(db: ENSRainbowDB): Promise { + try { + await db.close(); + } catch (error) { + logger.warn(error, "Failed to close partially-opened ENSRainbow database during shutdown"); + } +} + +interface DownloadAndExtractParams { + dataDir: string; + dbSchemaVersion: DbSchemaVersion; + labelSetId: string; + labelSetVersion: number; + downloadTempDir: string; + labelsetServerUrl?: string | undefined; + signal: AbortSignal; +} + +async function downloadAndExtractDatabase(params: DownloadAndExtractParams): Promise { + const { dataDir, dbSchemaVersion, labelSetId, labelSetVersion, downloadTempDir, signal } = params; + + // Clean stale state from previous aborted attempts. Async fs ops keep the event loop + // responsive so /health and /ready continue to answer probes during heavy disk I/O. + await rm(downloadTempDir, { recursive: true, force: true }); + await mkdir(downloadTempDir, { recursive: true }); + + const downloadScript = resolveDownloadScriptPath(); + logger.info( + `Downloading ENSRainbow database (schema=${dbSchemaVersion}, id=${labelSetId}, version=${labelSetVersion}) via ${downloadScript}`, + ); + + await spawnChild( + "bash", + [downloadScript, String(dbSchemaVersion), labelSetId, String(labelSetVersion)], + { + OUT_DIR: downloadTempDir, + ...(params.labelsetServerUrl + ? { ENSRAINBOW_LABELSET_SERVER_URL: params.labelsetServerUrl } + : {}), + }, + signal, + ); + + const archivePath = join( + downloadTempDir, + "databases", + String(dbSchemaVersion), + `${labelSetId}_${labelSetVersion}.tgz`, + ); + if (!existsSync(archivePath)) { + throw new Error( + `Expected database archive file not found at ${archivePath} after download completed`, + ); + } + + logger.info(`Extracting ${archivePath} into ${dataDir}`); + await mkdir(dataDir, { recursive: true }); + // Ensure extraction target is clean; tar does not delete stale partial files. + const dbSubdir = join(dataDir, `data-${labelSetId}_${labelSetVersion}`); + await rm(dbSubdir, { recursive: true, force: true }); + await spawnChild("tar", ["-xzf", archivePath, "-C", dataDir, "--strip-components=1"], {}, signal); + + await rm(downloadTempDir, { recursive: true, force: true }); +} + +export const __TESTING__ = { + downloadAndExtractDatabase, +}; + +function resolveDownloadScriptPath(): string { + const here = dirname(fileURLToPath(import.meta.url)); + // From `src/commands` or `dist/commands`, go up two levels to app root. + return resolve(here, "..", "..", "scripts", "download-prebuilt-database.sh"); +} + +function spawnChild( + command: string, + args: string[], + extraEnv: Record, + signal: AbortSignal, +): Promise { + return new Promise((resolvePromise, reject) => { + if (signal.aborted) { + reject(new BootstrapAbortedError()); + return; + } + + const child = spawn(command, args, { + stdio: "inherit", + env: { ...process.env, ...extraEnv }, + }); + + // On abort: SIGTERM first, then SIGKILL after a grace period. + let killTimer: NodeJS.Timeout | undefined; + const onAbort = () => { + if (child.exitCode !== null || child.signalCode !== null) return; + child.kill("SIGTERM"); + killTimer = setTimeout(() => { + if (child.exitCode === null && child.signalCode === null) { + child.kill("SIGKILL"); + } + }, CHILD_PROCESS_KILL_GRACE_MS); + killTimer.unref(); + }; + signal.addEventListener("abort", onAbort, { once: true }); + + const cleanup = () => { + signal.removeEventListener("abort", onAbort); + if (killTimer) clearTimeout(killTimer); + }; + + child.on("error", (err) => { + cleanup(); + reject(err); + }); + child.on("exit", (code, exitSignal) => { + cleanup(); + if (signal.aborted) { + reject(new BootstrapAbortedError()); + return; + } + if (code === 0) { + resolvePromise(); + return; + } + reject( + new Error( + `Command '${command} ${args.join(" ")}' exited with ${ + exitSignal ? `signal ${exitSignal}` : `code ${code}` + }`, + ), + ); + }); + }); +} diff --git a/apps/ensrainbow/src/commands/server-command.test.ts b/apps/ensrainbow/src/commands/server-command.test.ts index 82b93931e0..ed4834a2ce 100644 --- a/apps/ensrainbow/src/commands/server-command.test.ts +++ b/apps/ensrainbow/src/commands/server-command.test.ts @@ -1,4 +1,6 @@ import { promises as fs } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; import { serve } from "@hono/node-server"; import { asLiteralLabel, labelhashLiteralLabel } from "enssdk"; @@ -11,6 +13,7 @@ import { buildEnsRainbowPublicConfig } from "@/config/public"; import { createApi } from "@/lib/api"; import { ENSRainbowDB } from "@/lib/database"; import { buildDbConfig, ENSRainbowServer } from "@/lib/server"; +import { closeHttpServer } from "@/utils/http-server"; describe("Server Command Tests", () => { let db: ENSRainbowDB; @@ -35,7 +38,11 @@ describe("Server Command Tests", () => { const ensRainbowServer = await ENSRainbowServer.init(db); const dbConfig = await buildDbConfig(ensRainbowServer); const publicConfig = buildEnsRainbowPublicConfig(dbConfig); - app = createApi(ensRainbowServer, publicConfig, dbConfig); + app = createApi( + ensRainbowServer, + () => publicConfig, + () => dbConfig, + ); // Start the server on a different port than what ENSRainbow defaults to server = serve({ @@ -57,7 +64,7 @@ describe("Server Command Tests", () => { afterAll(async () => { // Cleanup try { - if (server) await server.close(); + if (server) await closeHttpServer(server); if (db) await db.close(); await fs.rm(TEST_DB_DIR, { recursive: true, force: true }); } catch (error) { @@ -128,6 +135,15 @@ describe("Server Command Tests", () => { }); }); + describe("GET /ready", () => { + it("should return ok status when the server has an attached database", async () => { + const response = await fetch(`http://localhost:${nonDefaultPort}/ready`); + expect(response.status).toBe(200); + const data = (await response.json()) as EnsRainbow.ReadyResponse; + expect(data).toEqual({ status: "ok" } satisfies EnsRainbow.ReadyResponse); + }); + }); + describe("GET /v1/labels/count", () => { it("should return count snapshot from startup (from dbConfig.recordsCount)", async () => { // Count is fixed at server start; changing the DB does not affect the response @@ -176,6 +192,120 @@ describe("Server Command Tests", () => { }); }); + describe("Pending server (no DB attached yet)", () => { + const pendingPort = 3225; + let pendingApp: Hono; + let pendingServer: ReturnType; + let pendingEnsRainbowServer: ENSRainbowServer; + let pendingPublicConfig: EnsRainbow.ENSRainbowPublicConfig | null; + let pendingDbConfig: Awaited> | null; + + beforeAll(async () => { + pendingEnsRainbowServer = ENSRainbowServer.createPending(); + pendingPublicConfig = null; + pendingDbConfig = null; + pendingApp = createApi( + pendingEnsRainbowServer, + () => pendingPublicConfig, + () => pendingDbConfig, + ); + pendingServer = serve({ + fetch: pendingApp.fetch, + port: pendingPort, + }); + }); + + afterAll(async () => { + try { + if (pendingServer) await closeHttpServer(pendingServer); + await pendingEnsRainbowServer.close(); + } catch (error) { + console.error("Pending server cleanup failed:", error); + } + }); + + it("GET /health returns 200 immediately without a DB", async () => { + const response = await fetch(`http://localhost:${pendingPort}/health`); + expect(response.status).toBe(200); + const data = (await response.json()) as EnsRainbow.HealthResponse; + expect(data).toEqual({ status: "ok" } satisfies EnsRainbow.HealthResponse); + }); + + it("GET /ready returns 503 while the DB is not attached", async () => { + const response = await fetch(`http://localhost:${pendingPort}/ready`); + expect(response.status).toBe(503); + const data = (await response.json()) as EnsRainbow.ServiceUnavailableError; + expect(data.status).toBe(StatusCode.Error); + expect(data.errorCode).toBe(ErrorCode.ServiceUnavailable); + }); + + it("GET /v1/heal/:labelhash returns 503 while the DB is not attached", async () => { + const someLabelhash = labelhashLiteralLabel(asLiteralLabel("test")); + const response = await fetch(`http://localhost:${pendingPort}/v1/heal/${someLabelhash}`); + expect(response.status).toBe(503); + const data = (await response.json()) as EnsRainbow.ServiceUnavailableError; + expect(data.errorCode).toBe(ErrorCode.ServiceUnavailable); + }); + + it("GET /v1/labels/count and /v1/config return 503 while the DB is not attached", async () => { + const [countRes, configRes] = await Promise.all([ + fetch(`http://localhost:${pendingPort}/v1/labels/count`), + fetch(`http://localhost:${pendingPort}/v1/config`), + ]); + expect(countRes.status).toBe(503); + expect(configRes.status).toBe(503); + }); + + it("After attachDb, /ready returns 200 and /v1/heal serves labels", async () => { + const attachDataDir = await fs.mkdtemp( + join(tmpdir(), "ensrainbow-test-server-pending-attach-"), + ); + + const attachDb = await ENSRainbowDB.create(attachDataDir); + try { + await attachDb.setPrecalculatedRainbowRecordCount(1); + await attachDb.markIngestionFinished(); + await attachDb.setLabelSetId("pending-test"); + await attachDb.setHighestLabelSetVersion(0); + await attachDb.addRainbowRecord("pending-label", 0); + + await pendingEnsRainbowServer.attachDb(attachDb); + pendingDbConfig = await buildDbConfig(pendingEnsRainbowServer); + pendingPublicConfig = buildEnsRainbowPublicConfig(pendingDbConfig); + + const readyRes = await fetch(`http://localhost:${pendingPort}/ready`); + expect(readyRes.status).toBe(200); + + const labelhash = labelhashLiteralLabel(asLiteralLabel("pending-label")); + const healRes = await fetch(`http://localhost:${pendingPort}/v1/heal/${labelhash}`); + expect(healRes.status).toBe(200); + const healData = (await healRes.json()) as EnsRainbow.HealResponse; + expect(healData).toEqual({ + status: StatusCode.Success, + label: "pending-label", + } satisfies EnsRainbow.HealSuccess); + + const configRes = await fetch(`http://localhost:${pendingPort}/v1/config`); + expect(configRes.status).toBe(200); + const configData = (await configRes.json()) as EnsRainbow.ENSRainbowPublicConfig; + expect(configData.serverLabelSet.labelSetId).toBe("pending-test"); + expect(configData.serverLabelSet.highestLabelSetVersion).toBe(0); + + const countRes = await fetch(`http://localhost:${pendingPort}/v1/labels/count`); + expect(countRes.status).toBe(200); + const countData = (await countRes.json()) as EnsRainbow.CountResponse; + expect(countData).toEqual({ + status: StatusCode.Success, + count: 1, + timestamp: expect.any(String), + } satisfies EnsRainbow.CountSuccess); + } finally { + await pendingEnsRainbowServer.close(); + await fs.rm(attachDataDir, { recursive: true, force: true }); + } + }); + }); + describe("CORS headers for /v1/* routes", () => { it("should return CORS headers for /v1/* routes", async () => { const validLabel = "test-label"; diff --git a/apps/ensrainbow/src/commands/server-command.ts b/apps/ensrainbow/src/commands/server-command.ts index f5c940044a..e8ad9a4f4d 100644 --- a/apps/ensrainbow/src/commands/server-command.ts +++ b/apps/ensrainbow/src/commands/server-command.ts @@ -8,6 +8,7 @@ import { buildEnsRainbowPublicConfig } from "@/config/public"; import { createApi } from "@/lib/api"; import { ENSRainbowDB } from "@/lib/database"; import { buildDbConfig, ENSRainbowServer } from "@/lib/server"; +import { closeHttpServer } from "@/utils/http-server"; import { logger } from "@/utils/logger"; export type ServerCommandOptions = ServeCommandConfig; @@ -30,7 +31,11 @@ export async function serverCommand(options: ServerCommandOptions): Promise publicConfig, + () => dbConfig, + ); const httpServer = serve({ fetch: app.fetch, @@ -38,20 +43,48 @@ export async function serverCommand(options: ServerCommandOptions): Promise | undefined; const shutdown = async () => { + if (shutdownPromise) { + return shutdownPromise; + } + logger.info("Shutting down server..."); - try { - await httpServer.close(); - await db.close(); + shutdownPromise = (async () => { + let hadShutdownError = false; + + try { + await closeHttpServer(httpServer); + } catch (error) { + hadShutdownError = true; + logger.error(error, "Failed to close HTTP server during shutdown"); + } finally { + try { + await db.close(); + } catch (error) { + hadShutdownError = true; + logger.error(error, "Failed to close database during shutdown"); + } + } + + if (hadShutdownError) { + process.exitCode = 1; + logger.error("Server shutdown completed with errors"); + return; + } + logger.info("Server shutdown complete"); - } catch (error) { - logger.error(error, "Error during shutdown:"); - throw error; - } + })(); + + return shutdownPromise; }; - process.on("SIGTERM", shutdown); - process.on("SIGINT", shutdown); + process.on("SIGTERM", () => { + void shutdown(); + }); + process.on("SIGINT", () => { + void shutdown(); + }); } catch (error) { await db.close(); throw error; diff --git a/apps/ensrainbow/src/lib/api.ts b/apps/ensrainbow/src/lib/api.ts index ca590d0554..a4c8b8498d 100644 --- a/apps/ensrainbow/src/lib/api.ts +++ b/apps/ensrainbow/src/lib/api.ts @@ -13,17 +13,49 @@ import { import { type EnsRainbow, ErrorCode, StatusCode } from "@ensnode/ensrainbow-sdk"; import type { DbConfig } from "@/config/types"; -import type { ENSRainbowServer } from "@/lib/server"; +import { DbNotReadyError, type ENSRainbowServer } from "@/lib/server"; import { getErrorMessage } from "@/utils/error-utils"; import { logger } from "@/utils/logger"; +/** + * Supplier of the current public config for the API. + * + * Returns `null` while the server is still bootstrapping its database. Once the database is + * attached, the supplier returns the final `ENSRainbowPublicConfig` (cached by the caller). + */ +export type PublicConfigSupplier = () => EnsRainbow.ENSRainbowPublicConfig | null; + +/** + * Like {@link PublicConfigSupplier}, but yields the {@link DbConfig} snapshot. + */ +export type DbConfigSupplier = () => DbConfig | null; + +/** + * Shared 503 response body for endpoints that require the database to be ready. + */ +const BOOTSTRAPPING_MESSAGE = "ENSRainbow is still bootstrapping its database"; + +function buildServiceUnavailableBody( + message: string = BOOTSTRAPPING_MESSAGE, +): EnsRainbow.ServiceUnavailableError { + return { + status: StatusCode.Error, + error: message, + errorCode: ErrorCode.ServiceUnavailable, + }; +} + /** * Creates and configures the ENS Rainbow API routes. + * + * When `publicConfigSupplier` (or `dbConfigSupplier`) returns `null`, routes that depend on the + * database respond with HTTP 503 so that clients polling `/ready` can wait for the bootstrap to + * complete. */ export function createApi( server: ENSRainbowServer, - publicConfig: EnsRainbow.ENSRainbowPublicConfig, - dbConfig: DbConfig, + publicConfigSupplier: PublicConfigSupplier, + dbConfigSupplier: DbConfigSupplier, ): Hono { const api = new Hono(); @@ -39,6 +71,10 @@ export function createApi( ); api.get("/v1/heal/:labelhash", async (c: HonoContext) => { + if (!server.isReady()) { + return c.json(buildServiceUnavailableBody(), 503); + } + const labelhash = c.req.param("labelhash") as `0x${string}`; const labelSetVersionParam = c.req.query("label_set_version"); @@ -79,8 +115,16 @@ export function createApi( ); } - const result = await server.heal(labelhash, clientLabelSet); - return c.json(result, result.errorCode); + try { + const result = await server.heal(labelhash, clientLabelSet); + return c.json(result, result.errorCode); + } catch (error) { + // Handle readiness races during shutdown. + if (error instanceof DbNotReadyError) { + return c.json(buildServiceUnavailableBody(), 503); + } + throw error; + } }); api.get("/health", (c: HonoContext) => { @@ -88,7 +132,21 @@ export function createApi( return c.json(result); }); + api.get("/ready", (c: HonoContext) => { + // Require both DB attach and config publication to avoid a transient false-ready state. + if (!server.isReady() || publicConfigSupplier() === null) { + return c.json(buildServiceUnavailableBody(), 503); + } + const result: EnsRainbow.ReadyResponse = { status: "ok" }; + return c.json(result); + }); + api.get("/v1/labels/count", (c: HonoContext) => { + const dbConfig = dbConfigSupplier(); + if (dbConfig === null) { + return c.json(buildServiceUnavailableBody(), 503); + } + const countResponse: EnsRainbow.CountSuccess = { status: StatusCode.Success, count: dbConfig.recordsCount, @@ -98,6 +156,10 @@ export function createApi( }); api.get("/v1/config", (c: HonoContext) => { + const publicConfig = publicConfigSupplier(); + if (publicConfig === null) { + return c.json(buildServiceUnavailableBody(), 503); + } return c.json(publicConfig); }); diff --git a/apps/ensrainbow/src/lib/server.ts b/apps/ensrainbow/src/lib/server.ts index 6892869e75..c3cca1ef1a 100644 --- a/apps/ensrainbow/src/lib/server.ts +++ b/apps/ensrainbow/src/lib/server.ts @@ -18,9 +18,11 @@ import { logger } from "@/utils/logger"; /** * Reads label set and record count from an initialized ENSRainbowServer. - * @throws Error if the record count cannot be read from the database. + * @throws Error if the server is not ready or the record count cannot be read from the database. */ export async function buildDbConfig(server: ENSRainbowServer): Promise { + const { serverLabelSet } = server.requireReady(); + const countResult = await server.labelCount(); if (countResult.status === StatusCode.Error) { throw new Error( @@ -29,24 +31,47 @@ export async function buildDbConfig(server: ENSRainbowServer): Promise } return { - serverLabelSet: server.serverLabelSet, + serverLabelSet, recordsCount: countResult.count, }; } +/** + * Thrown when a handler needs the database but the server has not finished bootstrapping yet. + * + * HTTP routes map this to a 503 Service Unavailable response so that clients polling `/ready` + * can retry instead of treating it as a fatal server error. + */ +export class DbNotReadyError extends Error { + constructor(message = "ENSRainbow is still bootstrapping its database") { + super(message); + this.name = "DbNotReadyError"; + Object.setPrototypeOf(this, DbNotReadyError.prototype); + } +} + export class ENSRainbowServer { - private readonly db: ENSRainbowDB; - public readonly serverLabelSet: EnsRainbowServerLabelSet; + private db: ENSRainbowDB | undefined; + private _serverLabelSet: EnsRainbowServerLabelSet | undefined; - private constructor(db: ENSRainbowDB, serverLabelSet: EnsRainbowServerLabelSet) { + private constructor(db?: ENSRainbowDB, serverLabelSet?: EnsRainbowServerLabelSet) { this.db = db; - this.serverLabelSet = serverLabelSet; + this._serverLabelSet = serverLabelSet; } /** - * Creates a new ENSRainbowServer instance - * @param db The ENSRainbowDB instance - * @param logLevel Optional log level + * The label set of the attached database. Only defined once the server is ready. + */ + public get serverLabelSet(): EnsRainbowServerLabelSet | undefined { + return this._serverLabelSet; + } + + public isReady(): boolean { + return this.db !== undefined && this._serverLabelSet !== undefined; + } + + /** + * Creates a new ENSRainbowServer instance with an already-opened database. * @throws Error if a "lite" validation of the database fails */ public static async init(db: ENSRainbowDB): Promise { @@ -62,6 +87,58 @@ export class ENSRainbowServer { return new ENSRainbowServer(db, serverLabelSet); } + /** + * Creates a new ENSRainbowServer in a "pending" state without a database attached. + * + * The HTTP server can start serving `/health` and `/ready` immediately while a background task + * downloads and validates the database. Once ready, call {@link attachDb} to transition the + * server into its ready state. + */ + public static createPending(): ENSRainbowServer { + return new ENSRainbowServer(); + } + + /** + * Attaches a validated database to a previously-pending server instance, making it ready. + * + * @throws Error if the server already has a database attached or if the database fails lite validation. + */ + public async attachDb(db: ENSRainbowDB): Promise { + if (this.db !== undefined) { + throw new Error("ENSRainbowServer already has a database attached"); + } + + if (!(await db.validate({ lite: true }))) { + throw new Error("Database is in an invalid state"); + } + + this._serverLabelSet = await db.getLabelSet(); + this.db = db; + } + + private requireDb(): ENSRainbowDB { + if (this.db === undefined) { + throw new DbNotReadyError(); + } + return this.db; + } + + /** + * Returns both ready-state values or throws if the server is not ready. + * + * Centralizes the invariant established by {@link attachDb}: once `db` is attached, + * `_serverLabelSet` must also be present. + */ + public requireReady(): { db: ENSRainbowDB; serverLabelSet: EnsRainbowServerLabelSet } { + const db = this.requireDb(); + if (this._serverLabelSet === undefined) { + throw new Error( + "ENSRainbowServer invariant violation: database is attached but server label set is missing", + ); + } + return { db, serverLabelSet: this._serverLabelSet }; + } + /** * Determines if a versioned rainbow record should be treated as unhealable * based on the client's label set version requirements, ignoring the label set ID. @@ -81,6 +158,8 @@ export class ENSRainbowServer { labelHash: LabelHash, clientLabelSet: EnsRainbowClientLabelSet, ): Promise { + const { db, serverLabelSet } = this.requireReady(); + let labelHashBytes: ByteArray; try { labelHashBytes = labelHashToBytes(labelHash); @@ -94,7 +173,7 @@ export class ENSRainbowServer { } try { - validateSupportedLabelSetAndVersion(this.serverLabelSet, clientLabelSet); + validateSupportedLabelSetAndVersion(serverLabelSet, clientLabelSet); } catch (error) { logger.info(getErrorMessage(error)); return { @@ -105,7 +184,7 @@ export class ENSRainbowServer { } try { - const versionedRainbowRecord = await this.db.getVersionedRainbowRecord(labelHashBytes); + const versionedRainbowRecord = await db.getVersionedRainbowRecord(labelHashBytes); if ( versionedRainbowRecord === null || ENSRainbowServer.needToSimulateAsUnhealable(versionedRainbowRecord, clientLabelSet) @@ -138,8 +217,9 @@ export class ENSRainbowServer { } async labelCount(): Promise { + const db = this.requireDb(); try { - const precalculatedCount = await this.db.getPrecalculatedRainbowRecordCount(); + const precalculatedCount = await db.getPrecalculatedRainbowRecordCount(); return { status: StatusCode.Success, count: precalculatedCount, @@ -162,4 +242,18 @@ export class ENSRainbowServer { } satisfies EnsRainbow.CountServerError; } } + + /** + * Closes the attached database (if any). Safe to call on a pending server. + * + * Resets readiness before awaiting DB close so new handlers fail fast with + * `DbNotReadyError` instead of racing an in-progress teardown. + */ + async close(): Promise { + const capturedDb = this.db; + this.db = undefined; + this._serverLabelSet = undefined; + if (capturedDb === undefined) return; + await capturedDb.close(); + } } diff --git a/apps/ensrainbow/src/utils/http-server.ts b/apps/ensrainbow/src/utils/http-server.ts new file mode 100644 index 0000000000..cbb9628dcc --- /dev/null +++ b/apps/ensrainbow/src/utils/http-server.ts @@ -0,0 +1,32 @@ +import type { Server as HttpServer } from "node:http"; +import type { Http2SecureServer, Http2Server } from "node:http2"; + +/** + * Promisified wrapper around Node's callback-based `http.Server.close()`. + * + * `@hono/node-server`'s `serve()` returns a plain Node `http.Server | Http2Server | + * Http2SecureServer`, whose `close(callback?)` is NOT promise-returning (it returns `this`). + * Directly `await`ing `httpServer.close()` therefore resolves immediately against a + * non-thenable server object, so in-flight requests would be racing any subsequent teardown + * (DB close, etc.). This wrapper makes the await actually wait until all active connections + * have finished, or `close` has errored. + */ +export function closeHttpServer( + server: HttpServer | Http2Server | Http2SecureServer, +): Promise { + return new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + // `server.close()` is idempotent from a lifecycle perspective. If shutdown races with + // another close path and the server is already stopped, treat this as a no-op. + if ("code" in error && error.code === "ERR_SERVER_NOT_RUNNING") { + resolve(); + return; + } + reject(error); + return; + } + resolve(); + }); + }); +} diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/glossary.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/glossary.mdx index f8fdebc85e..1b23c19bb9 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/concepts/glossary.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/concepts/glossary.mdx @@ -51,7 +51,7 @@ Non-negative integer that monotonically increases when new labelhash-to-label ma ## Healable Count -Total number of labels that can currently be healed by the running server. Exposed via `/count`. +Total number of labels that can currently be healed by the running server. Exposed via `/v1/labels/count`. **Example:** `7 892 001` @@ -61,7 +61,7 @@ High-level outcome of an API call – either `success` or `error`. ## Error Code -HTTP-style numeric code describing the error (`400`, `404`, `500`). +HTTP-style numeric code describing the error (`400`, `404`, `500`, `503`). ## Rainbow Table diff --git a/docs/ensnode.io/src/content/docs/ensrainbow/index.mdx b/docs/ensnode.io/src/content/docs/ensrainbow/index.mdx index 83b73ca930..e59e09e090 100644 --- a/docs/ensnode.io/src/content/docs/ensrainbow/index.mdx +++ b/docs/ensnode.io/src/content/docs/ensrainbow/index.mdx @@ -92,7 +92,7 @@ Get type-safe integration with built-in error handling and caching. Perfect for /> ### Deploy with Docker -Run your own ENSRainbow instance for control, privacy, or high-volume usage. Starts quickly with test data. +Run your own ENSRainbow instance for control, privacy, or high-volume usage. The HTTP server starts immediately; poll `/ready` to gate traffic until the database bootstrap completes. { it("should return a positive health check", async () => { mockFetch.mockResolvedValueOnce({ + ok: true, + status: 200, + statusText: "OK", json: () => Promise.resolve({ status: "ok", @@ -271,6 +275,80 @@ describe("EnsRainbowApiClient", () => { } satisfies EnsRainbow.HealthResponse); }); + describe("ready", () => { + it("should resolve when the server is ready (HTTP 200)", async () => { + mockFetch.mockResolvedValueOnce({ + ok: true, + status: 200, + json: () => + Promise.resolve({ + status: "ok", + } satisfies EnsRainbow.ReadyResponse), + }); + + const response = await client.ready(); + + expect(mockFetch).toHaveBeenCalledWith(new URL("/ready", DEFAULT_ENSRAINBOW_URL)); + expect(response).toEqual({ + status: "ok", + } satisfies EnsRainbow.ReadyResponse); + }); + + it("should throw an EnsRainbowHttpError carrying status 503 when the server is not ready yet", async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 503, + statusText: "Service Unavailable", + }); + + const error = await client.ready().catch((e: unknown) => e); + expect(error).toBeInstanceOf(EnsRainbowHttpError); + expect((error as EnsRainbowHttpError).status).toBe(503); + expect((error as EnsRainbowHttpError).statusText).toBe("Service Unavailable"); + expect((error as EnsRainbowHttpError).message).toMatch(/503/); + }); + + it("should throw an EnsRainbowHttpError carrying the original status for non-503 failures (e.g. 404 misrouting)", async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 404, + statusText: "Not Found", + }); + + const error = await client.ready().catch((e: unknown) => e); + expect(error).toBeInstanceOf(EnsRainbowHttpError); + expect((error as EnsRainbowHttpError).status).toBe(404); + expect((error as EnsRainbowHttpError).message).toMatch(/404/); + }); + + it("should throw an EnsRainbowHttpError for HTTP 500 server errors", async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 500, + statusText: "Internal Server Error", + }); + + const error = await client.ready().catch((e: unknown) => e); + expect(error).toBeInstanceOf(EnsRainbowHttpError); + expect((error as EnsRainbowHttpError).status).toBe(500); + }); + }); + + describe("health", () => { + it("should throw an EnsRainbowHttpError on non-2xx responses with the original status", async () => { + mockFetch.mockResolvedValueOnce({ + ok: false, + status: 502, + statusText: "Bad Gateway", + }); + + const error = await client.health().catch((e: unknown) => e); + expect(error).toBeInstanceOf(EnsRainbowHttpError); + expect((error as EnsRainbowHttpError).status).toBe(502); + expect((error as EnsRainbowHttpError).statusText).toBe("Bad Gateway"); + }); + }); + describe("config", () => { it("should request /v1/config and return public config on success", async () => { const configData: EnsRainbow.ENSRainbowPublicConfig = { @@ -294,10 +372,12 @@ describe("EnsRainbowApiClient", () => { expect(response).toEqual(configData); }); - it("should throw with fallback message when error body is not valid JSON", async () => { - mockFetch.mockResolvedValueOnce({ ok: false, statusText: "Not Found" }); + it("should throw an EnsRainbowHttpError with the original status when the response is not OK", async () => { + mockFetch.mockResolvedValueOnce({ ok: false, status: 404, statusText: "Not Found" }); - await expect(client.config()).rejects.toThrow(/Not Found/); + const error = await client.config().catch((e: unknown) => e); + expect(error).toBeInstanceOf(EnsRainbowHttpError); + expect((error as EnsRainbowHttpError).status).toBe(404); }); }); }); @@ -382,4 +462,14 @@ describe("HealResponse cacheability", () => { expect(isCacheableHealResponse(response)).toBe(false); }); + + it("should consider HealServiceUnavailableError responses not cacheable", async () => { + const response: EnsRainbow.HealServiceUnavailableError = { + status: StatusCode.Error, + error: "ENSRainbow is still bootstrapping its database", + errorCode: ErrorCode.ServiceUnavailable, + }; + + expect(isCacheableHealResponse(response)).toBe(false); + }); }); diff --git a/packages/ensrainbow-sdk/src/client.ts b/packages/ensrainbow-sdk/src/client.ts index 3144a35b6c..97b45f7d50 100644 --- a/packages/ensrainbow-sdk/src/client.ts +++ b/packages/ensrainbow-sdk/src/client.ts @@ -11,6 +11,40 @@ import { import { DEFAULT_ENSRAINBOW_URL, ErrorCode, StatusCode } from "./consts"; +/** + * Error thrown by {@link EnsRainbowApiClient} methods when the ENSRainbow service responds + * with a non-2xx HTTP status code. + * + * Carries the HTTP status code as a structured property (rather than only embedding it in the + * error message) so callers can branch their retry/abort logic on the status — e.g. retry on + * `503 Service Unavailable` while ENSRainbow bootstraps, but abort immediately on `404`/`500`, + * which usually indicate a misconfigured base URL or a hard server failure. + * + * Network-level failures (DNS, ECONNREFUSED, fetch parse errors) are *not* wrapped in this + * class — they propagate as their original `Error` (typically a `TypeError` from `fetch`), + * because such failures are commonly transient during cold start and should remain retryable + * by callers. + */ +export class EnsRainbowHttpError extends Error { + readonly name = "EnsRainbowHttpError"; + + /** + * The HTTP status code returned by the ENSRainbow service. + */ + readonly status: number; + + /** + * The HTTP status text returned by the ENSRainbow service, if any. + */ + readonly statusText: string; + + constructor(message: string, status: number, statusText = "") { + super(message); + this.status = status; + this.statusText = statusText; + } +} + export namespace EnsRainbow { export type ApiClientOptions = EnsRainbowApiClientOptions; @@ -32,6 +66,14 @@ export namespace EnsRainbow { health(): Promise; + /** + * Check whether the ENSRainbow service has finished bootstrapping and is ready to serve requests. + * + * Throws when the service is not ready (e.g. 503 while the database is still being downloaded + * or validated) so callers can retry. + */ + ready(): Promise; + getOptions(): Readonly; } @@ -43,6 +85,23 @@ export namespace EnsRainbow { status: "ok"; } + /** + * Response returned by `GET /ready` when the ENSRainbow service is ready to serve requests. + */ + export interface ReadyResponse { + status: "ok"; + } + + /** + * Generic error shape used by endpoints that return 503 Service Unavailable while the + * database is still bootstrapping (downloading, extracting, or validating). + */ + export interface ServiceUnavailableError { + status: typeof StatusCode.Error; + error: string; + errorCode: typeof ErrorCode.ServiceUnavailable; + } + export interface BaseHealResponse { status: Status; label?: Label | never; @@ -81,17 +140,29 @@ export namespace EnsRainbow { errorCode: typeof ErrorCode.BadRequest; } + export interface HealServiceUnavailableError + extends BaseHealResponse { + status: typeof StatusCode.Error; + label?: never; + error: string; + errorCode: typeof ErrorCode.ServiceUnavailable; + } + export type HealResponse = | HealSuccess | HealNotFoundError | HealServerError - | HealBadRequestError; + | HealBadRequestError + | HealServiceUnavailableError; export type HealError = Exclude; /** - * Server errors should not be cached. + * Server errors and transient bootstrap errors should not be cached. */ - export type CacheableHealResponse = Exclude; + export type CacheableHealResponse = Exclude< + HealResponse, + HealServerError | HealServiceUnavailableError + >; export interface BaseCountResponse { status: Status; @@ -120,7 +191,16 @@ export namespace EnsRainbow { errorCode: typeof ErrorCode.ServerError; } - export type CountResponse = CountSuccess | CountServerError; + export interface CountServiceUnavailableError + extends BaseCountResponse { + status: typeof StatusCode.Error; + count?: never; + timestamp?: never; + error: string; + errorCode: typeof ErrorCode.ServiceUnavailable; + } + + export type CountResponse = CountSuccess | CountServerError | CountServiceUnavailableError; /** * Complete public configuration object for ENSRainbow. @@ -352,17 +432,77 @@ export class EnsRainbowApiClient implements EnsRainbow.ApiClient { async health(): Promise { const response = await fetch(new URL("/health", this.options.endpointUrl)); + if (!response.ok) { + throw new EnsRainbowHttpError( + `ENSRainbow health check failed (HTTP ${response.status}${ + response.statusText ? ` ${response.statusText}` : "" + })`, + response.status, + response.statusText, + ); + } + return response.json() as Promise; } + /** + * Check whether the ENSRainbow service is ready (database is downloaded, validated, and open). + * + * Unlike {@link EnsRainbowApiClient.health}, which is a pure liveness probe that succeeds as soon + * as the HTTP server is accepting requests, `ready()` only resolves once the service has finished + * bootstrapping its database. Clients that require a usable database (e.g. ENSIndexer) should + * poll this method instead of `health()` during startup. + * + * @throws {EnsRainbowHttpError} if the service responds with a non-2xx status. The thrown + * error carries the HTTP `status` so callers can distinguish the retryable bootstrap case + * (`503 Service Unavailable`) from likely-non-retryable misconfiguration / server failures + * (e.g. `404`, `500`) and abort retries early in the latter cases. + * @throws Network/fetch errors (DNS, ECONNREFUSED, etc.) propagate as their original error + * type and should generally remain retryable, since they are common during cold start before + * the ENSRainbow HTTP server has bound its port. + */ + async ready(): Promise { + const response = await fetch(new URL("/ready", this.options.endpointUrl)); + + if (!response.ok) { + const statusSuffix = `HTTP ${response.status}${ + response.statusText ? ` ${response.statusText}` : "" + }`; + + if (response.status === 503) { + throw new EnsRainbowHttpError( + `ENSRainbow readiness check: service not ready yet (${statusSuffix})`, + response.status, + response.statusText, + ); + } + + throw new EnsRainbowHttpError( + `ENSRainbow readiness check failed (${statusSuffix}). This usually indicates a non-readiness issue (e.g. wrong base URL, misrouting, or a server error).`, + response.status, + response.statusText, + ); + } + + return response.json() as Promise; + } + /** * Get the public configuration of the ENSRainbow service. + * + * @throws {EnsRainbowHttpError} if the service responds with a non-2xx status. */ async config(): Promise { const response = await fetch(new URL("/v1/config", this.options.endpointUrl)); if (!response.ok) { - throw new Error(`Failed to fetch ENSRainbow config: ${response.statusText}`); + throw new EnsRainbowHttpError( + `Failed to fetch ENSRainbow config: HTTP ${response.status}${ + response.statusText ? ` ${response.statusText}` : "" + }`, + response.status, + response.statusText, + ); } return response.json() as Promise; @@ -408,5 +548,9 @@ export const isHealError = ( export const isCacheableHealResponse = ( response: EnsRainbow.HealResponse, ): response is EnsRainbow.CacheableHealResponse => { - return response.status === StatusCode.Success || response.errorCode !== ErrorCode.ServerError; + if (response.status === StatusCode.Success) return true; + return ( + response.errorCode !== ErrorCode.ServerError && + response.errorCode !== ErrorCode.ServiceUnavailable + ); }; diff --git a/packages/ensrainbow-sdk/src/consts.ts b/packages/ensrainbow-sdk/src/consts.ts index 480d7753eb..6cb8590727 100644 --- a/packages/ensrainbow-sdk/src/consts.ts +++ b/packages/ensrainbow-sdk/src/consts.ts @@ -9,4 +9,5 @@ export const ErrorCode = { BadRequest: 400, NotFound: 404, ServerError: 500, + ServiceUnavailable: 503, } as const;