From 1d18c95e59afd7d037bfbdc8f9975a416d00ca83 Mon Sep 17 00:00:00 2001 From: Nick Ficano Date: Fri, 12 Jun 2026 08:15:32 -0400 Subject: [PATCH] fix(client): abandon timed-out WebSocket handshakes without blocking (#113) The connect timeout path called HttpClient.close(), which waits for the abandoned handshake to complete; against a server that accepts the TCP connection but never finishes the HTTP upgrade, the JDK client retries the GET on EOF, turning the configured timeout into an indefinite hang. The timeout path now cancels the handshake stage and uses shutdownNow(), so connect() throws promptly. Regression test connects to a silent accept-and-discard server with a 500ms timeout and bounds the wall clock. Closes #113 Co-Authored-By: Claude Fable 5 --- .../dev/arcp/client/WebSocketTransport.java | 6 +- .../client/WebSocketConnectTimeoutTest.java | 58 +++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 arcp-client/src/test/java/dev/arcp/client/WebSocketConnectTimeoutTest.java diff --git a/arcp-client/src/main/java/dev/arcp/client/WebSocketTransport.java b/arcp-client/src/main/java/dev/arcp/client/WebSocketTransport.java index c6434d4..8d9ccbd 100644 --- a/arcp-client/src/main/java/dev/arcp/client/WebSocketTransport.java +++ b/arcp-client/src/main/java/dev/arcp/client/WebSocketTransport.java @@ -127,8 +127,12 @@ public void onError(WebSocket webSocket, Throwable error) { } throw new IllegalStateException("WebSocket connect failed: " + cause, cause); } catch (java.util.concurrent.TimeoutException e) { + // Abandon the in-flight handshake without waiting on it: close() blocks until the operation + // completes, and against a server that accepts but never finishes the upgrade the JDK client + // retries on EOF — close() would turn the configured timeout into an indefinite hang (#113). + stage.cancel(true); try { - httpClient.close(); + httpClient.shutdownNow(); } catch (RuntimeException ignored) { // best-effort } diff --git a/arcp-client/src/test/java/dev/arcp/client/WebSocketConnectTimeoutTest.java b/arcp-client/src/test/java/dev/arcp/client/WebSocketConnectTimeoutTest.java new file mode 100644 index 0000000..075b43c --- /dev/null +++ b/arcp-client/src/test/java/dev/arcp/client/WebSocketConnectTimeoutTest.java @@ -0,0 +1,58 @@ +package dev.arcp.client; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import dev.arcp.core.wire.ArcpMapper; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.URI; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +/** + * #113: a server that accepts the TCP connection but never completes the HTTP upgrade must not turn + * the configured connect timeout into an indefinite hang — the JDK client retries on EOF, and + * {@code HttpClient.close()} waits for the abandoned handshake. + */ +class WebSocketConnectTimeoutTest { + + @Test + @Timeout(15) + void connectThrowsPromptlyAgainstSilentServer() throws Exception { + AtomicBoolean running = new AtomicBoolean(true); + try (ServerSocket silent = new ServerSocket(0)) { + Thread accepter = + Thread.ofVirtual() + .start( + () -> { + while (running.get()) { + try { + Socket s = silent.accept(); + // Read and discard the upgrade request; never respond. + s.getInputStream().read(new byte[4096]); + } catch (java.io.IOException e) { + return; + } + } + }); + URI uri = URI.create("ws://127.0.0.1:" + silent.getLocalPort() + "/arcp"); + long start = System.nanoTime(); + assertThatThrownBy( + () -> + WebSocketTransport.connect( + uri, Map.of(), ArcpMapper.shared(), Duration.ofMillis(500))) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("timed out"); + long elapsedMillis = (System.nanoTime() - start) / 1_000_000; + // The hang fixed by #113 was unbounded; allow generous slack for slow CI but require the + // call to return in the same order of magnitude as the configured 500ms timeout. + assertThat(elapsedMillis).isLessThan(10_000); + running.set(false); + accepter.interrupt(); + } + } +}