Skip to content

Commit fd34191

Browse files
authored
http: document and validate options.path when it's in absolute-form
When `options.path` passed to `http.request()` contains an absolute URL, `http.request` has been sending it directly as the request target in the HTTP 1.1 message. If the receiving server is a proxy, the proxy server typically forwards the request to the destination specified in the request target and ignores the Host header. This means eventually the request can be forwarded to a destination that is not consistent with `options.host`, depending on how the receiving server behaves. Mimatched Host header and request target also violates RFC 9112 Section 3.2, which we have been entirely leaving to the users to verify. This patch documents this behavior and warns that the user needs to ensure the `path`, `option` and `headers` conform to the RFC. If the receiving server is known to be a proxy server because the request is routed by Node.js' built-in HTTP proxy support, we now do a best-effort check to verify that the authority in `options.path` (if absolute), Host headers and `options.host` agree at request construction time. Node.js will give up on the require target rewriting and throw an error when they don't match at request construction. Signed-off-by: Joyee Cheung <joyeec9h3@gmail.com> PR-URL: #64108 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Tim Perry <pimterry@gmail.com> Reviewed-By: Ulises Gascón <ulisesgascongonzalez@gmail.com>
1 parent 9c9c06f commit fd34191

7 files changed

Lines changed: 500 additions & 34 deletions

doc/api/http.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4096,6 +4096,18 @@ changes:
40964096
E.G. `'/index.html?page=12'`. An exception is thrown when the request path
40974097
contains illegal characters. Currently, only spaces are rejected but that
40984098
may change in the future. **Default:** `'/'`.
4099+
The content in `path` is sent as the [request target][] in the HTTP 1.1 message.
4100+
When `path` is an absolute URL, this means the request target in the message in [absolute form][].
4101+
If the receiving server is a proxy, the server typically forwards the request to the
4102+
destination specified in the request target, and ignores the `Host` header.
4103+
The user needs to make sure that `path`, `host` and the Host headers conform to the
4104+
requirement of the [request target][] in the HTTP specification.
4105+
When the receiving server is known to be a proxy because the request is routed through
4106+
[Built-in Proxy Support][], `http.request` will additionally perform a best-effort
4107+
check to see that the `host` option or `Host` in `headers` agrees with the authority
4108+
in `path` during the initial construction of the request. It gives up rewriting the
4109+
request target for proxying and throws an error if they don't match at request
4110+
construction time, though there won't be checks for later header mutations done by the user.
40994111
* `port` {number} Port of remote server. **Default:** `defaultPort` if set,
41004112
else `80`.
41014113
* `protocol` {string} Protocol to use. **Default:** `'http:'`.
@@ -4792,5 +4804,7 @@ const agent2 = new http.Agent({ proxyEnv: process.env });
47924804
[`writable.destroyed`]: stream.md#writabledestroyed
47934805
[`writable.uncork()`]: stream.md#writableuncork
47944806
[`writable.write()`]: stream.md#writablewritechunk-encoding-callback
4807+
[absolute form]: https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.2
47954808
[information event]: #event-information
47964809
[initial delay]: net.md#socketsetkeepaliveenable-initialdelay-interval-count
4810+
[request target]: https://datatracker.ietf.org/doc/html/rfc9112#section-3.2

lib/_http_client.js

Lines changed: 167 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ const {
8484
validateInteger,
8585
validateBoolean,
8686
validateOneOf,
87+
validatePort,
8788
validateString,
8889
} = require('internal/validators');
8990
const { getTimerDuration } = require('internal/timers');
@@ -139,15 +140,134 @@ class HTTPClientAsyncResource {
139140
}
140141
}
141142

143+
// The only documented shape is [k, v, k, v, ...]. Here we also accept [[k, v], [k, v], ...].
144+
// for backward compatibility, and reject others. Also reject if there are duplicate Host entries.
145+
// Returns the Host header value, or undefined if absent.
146+
function getHostFromHeaderArray(headers) {
147+
let host;
148+
const isPairs = headers.length > 0 && ArrayIsArray(headers[0]);
149+
if (isPairs) {
150+
for (let i = 0; i < headers.length; i++) {
151+
const entry = headers[i];
152+
if (!ArrayIsArray(entry)) {
153+
throw new ERR_INVALID_ARG_VALUE(`options.headers[${i}]`, typeof entry,
154+
'must be an array when headers is passed as an array of pairs');
155+
}
156+
if (`${entry[0]}`.toLowerCase() === 'host') {
157+
if (host !== undefined) {
158+
throw new ERR_INVALID_ARG_VALUE('options.headers', '(redacted)',
159+
'must not contain duplicate Host headers');
160+
}
161+
host = `${entry[1]}`;
162+
}
163+
}
164+
} else {
165+
for (let i = 0; i + 1 < headers.length; i += 2) {
166+
if (`${headers[i]}`.toLowerCase() === 'host') {
167+
if (host !== undefined) {
168+
throw new ERR_INVALID_ARG_VALUE('options.headers', '(redacted)',
169+
'must not contain duplicate Host headers');
170+
}
171+
host = `${headers[i + 1]}`;
172+
}
173+
}
174+
}
175+
return host;
176+
}
177+
178+
function authoritiesMatch(canonicalHost, hostFromHeader) {
179+
let parsed;
180+
try {
181+
parsed = new URL(`http://${hostFromHeader}`);
182+
} catch {
183+
return false;
184+
}
185+
if (parsed.username || parsed.password ||
186+
parsed.pathname !== '/' || parsed.search || parsed.hash) {
187+
return false;
188+
}
189+
return parsed.host === canonicalHost;
190+
}
191+
192+
// https://datatracker.ietf.org/doc/html/rfc9112#section-3.2
193+
// When the request target is in absolute-form, ensure it is consistent with
194+
// the request authority: same scheme, no userinfo, and an authority
195+
// component agree with options.host[:port].
196+
function validateRequestAuthority(pathOption, proxyAuthority, userHostHeader, headerArray) {
197+
validatePort(proxyAuthority.port, 'options.port', true);
198+
pathOption = `${pathOption}`;
199+
const requestBase = new URL(`http://${proxyAuthority.host}`);
200+
requestBase.port = proxyAuthority.port;
201+
202+
const result = { requestBase };
203+
if (headerArray !== undefined) {
204+
const host = getHostFromHeaderArray(headerArray);
205+
// Since we don't mutate the header array to normalize the Host value, unlike
206+
// in the case of other shapes of headers provided, we check that it is identical
207+
// to the authority from the requestBase.
208+
if (host !== undefined && host !== requestBase.host) {
209+
throw new ERR_INVALID_ARG_VALUE(
210+
'Host in options.headers', host,
211+
`must match the request authority (${requestBase.host})`);
212+
}
213+
} else if (userHostHeader !== undefined) {
214+
if (!authoritiesMatch(requestBase.host, userHostHeader)) {
215+
throw new ERR_INVALID_ARG_VALUE(
216+
'Host in options.headers', userHostHeader,
217+
`must match the request authority (${requestBase.host})`);
218+
}
219+
}
220+
221+
// Per RFC 9112 Section 3.2, if request target is in absolute-form its authority
222+
// must agree with the request authority.
223+
let requestURL;
224+
let isAbsoluteForm = false;
225+
try {
226+
requestURL = new URL(pathOption);
227+
isAbsoluteForm = true;
228+
} catch {
229+
if (pathOption.charCodeAt(0) !== 0x2F) {
230+
throw new ERR_INVALID_ARG_VALUE(
231+
'options.path', pathOption, 'must be in absolute-form or start with /');
232+
}
233+
requestURL = new URL(requestBase.origin + pathOption);
234+
}
235+
result.requestURL = requestURL;
236+
if (!isAbsoluteForm) {
237+
return result;
238+
}
239+
240+
if (requestURL.username || requestURL.password) {
241+
requestURL.username = '';
242+
requestURL.password = '';
243+
throw new ERR_INVALID_ARG_VALUE(
244+
'options.path', requestURL.href, 'must not contain userinfo, use options.auth instead');
245+
}
246+
247+
if (requestURL.protocol !== 'http:') {
248+
throw new ERR_INVALID_ARG_VALUE(
249+
'options.path', requestURL.protocol, 'must use http: scheme when specified as an absolute URL');
250+
}
251+
252+
if (requestBase.host !== requestURL.host) {
253+
throw new ERR_INVALID_ARG_VALUE(
254+
'options.path', requestURL, `must match the request authority (${requestBase.host})`);
255+
}
256+
257+
return result;
258+
}
259+
142260
// When proxying a HTTP request, the following needs to be done:
143-
// https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
261+
// https://datatracker.ietf.org/doc/html/rfc9112#section-3.2.2
144262
// 1. Rewrite the request path to absolute-form.
145263
// 2. Add proxy-connection and proxy-authorization headers appropriately.
146264
//
147265
// This function checks whether the request should be rewritten for proxying
148266
// and modifies the headers as well as req.path if necessary.
149267
// The handling of the proxy server connection is done in createConnection.
150-
function rewriteForProxiedHttp(req, reqOptions) {
268+
// It also validates that the Host header and absolute-form path authority match the
269+
// request authority specified by reqOptions.
270+
function rewriteForProxiedHttp(req, reqOptions, proxyAuthority, userHostHeader, headerArray) {
151271
if (req._header) {
152272
debug('request._header is already sent, skipping rewriteForProxiedHttp', reqOptions);
153273
return false;
@@ -165,6 +285,25 @@ function rewriteForProxiedHttp(req, reqOptions) {
165285
if (!shouldUseProxy) {
166286
return false;
167287
}
288+
289+
// Per RFC 9112 Section 3.2.2, we don't need to rewrite CONNECT or OPTIONS * requests.
290+
let requestURL;
291+
if (req.method !== 'CONNECT' && !(req.method === 'OPTIONS' && req.path === '*')) {
292+
// Validate Host header values agree with the request authority before mutating req,
293+
// so a rejected request doesn't leave proxy-* headers stuck on the outgoing header store.
294+
// XXX(joyeecheung): This validates whether the request conforms to the RFC, but here
295+
// we only do it for proxied requests for backward compatibility. For non-proxied requests,
296+
// ensuring thst the request is well formed has been entirely left to the user.
297+
const result = validateRequestAuthority(req.path, proxyAuthority, userHostHeader, headerArray);
298+
if (headerArray === undefined) {
299+
const currentHost = req.getHeader('host');
300+
if (currentHost !== undefined && currentHost !== result.requestBase.host) {
301+
req.setHeader('Host', result.requestBase.host);
302+
}
303+
}
304+
requestURL = result.requestURL;
305+
}
306+
168307
// Add proxy headers.
169308
const { auth, href } = agent[kProxyConfig];
170309
if (auth) {
@@ -176,15 +315,10 @@ function rewriteForProxiedHttp(req, reqOptions) {
176315
req.setHeader('proxy-connection', 'close');
177316
}
178317

179-
// Convert the path to absolute-form.
180-
// https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
181-
const requestHost = req.getHeader('host') || 'localhost';
182-
const requestBase = `http://${requestHost}`;
183-
const requestURL = new URL(req.path, requestBase);
184-
if (reqOptions.port) {
185-
requestURL.port = reqOptions.port;
318+
if (requestURL !== undefined) {
319+
// Convert the path to absolute-form. The authority is built from options.
320+
req.path = requestURL.href;
186321
}
187-
req.path = requestURL.href;
188322
debug(`updated request for HTTP proxy ${href} with ${req.path} `, req[kOutHeaders]);
189323
return true;
190324
};
@@ -360,6 +494,21 @@ function ClientRequest(input, options, cb) {
360494
}
361495
}
362496

497+
let hostHeaderFromOptions = host;
498+
// For the Host header, ensure that IPv6 addresses are enclosed
499+
// in square brackets, as defined by URI formatting
500+
// https://tools.ietf.org/html/rfc3986#section-3.2.2
501+
const posColon = hostHeaderFromOptions.indexOf(':');
502+
if (posColon !== -1 &&
503+
hostHeaderFromOptions.includes(':', posColon + 1) &&
504+
hostHeaderFromOptions.charCodeAt(0) !== 91/* '[' */) {
505+
hostHeaderFromOptions = `[${hostHeaderFromOptions}]`;
506+
}
507+
const proxyAuthority = { host: hostHeaderFromOptions, port };
508+
509+
if (port && +port !== defaultPort) {
510+
hostHeaderFromOptions += ':' + port;
511+
}
363512
const headersArray = ArrayIsArray(options.headers);
364513
if (!headersArray) {
365514
if (options.headers) {
@@ -372,23 +521,12 @@ function ClientRequest(input, options, cb) {
372521
}
373522
}
374523

375-
if (host && !this.getHeader('host') && setHost) {
376-
let hostHeader = host;
377-
378-
// For the Host header, ensure that IPv6 addresses are enclosed
379-
// in square brackets, as defined by URI formatting
380-
// https://tools.ietf.org/html/rfc3986#section-3.2.2
381-
const posColon = hostHeader.indexOf(':');
382-
if (posColon !== -1 &&
383-
hostHeader.includes(':', posColon + 1) &&
384-
hostHeader.charCodeAt(0) !== 91/* '[' */) {
385-
hostHeader = `[${hostHeader}]`;
386-
}
524+
// Save the Host header before the implicit auto-set below, so the
525+
// proxy validator can tell user-explicit values from Node-generated ones.
526+
const userHostHeader = this.getHeader('host');
387527

388-
if (port && +port !== defaultPort) {
389-
hostHeader += ':' + port;
390-
}
391-
this.setHeader('Host', hostHeader);
528+
if (host && !this.getHeader('host') && setHost) {
529+
this.setHeader('Host', hostHeaderFromOptions);
392530
}
393531

394532
if (options.auth && !this.getHeader('Authorization')) {
@@ -401,14 +539,14 @@ function ClientRequest(input, options, cb) {
401539
throw new ERR_HTTP_HEADERS_SENT('render');
402540
}
403541

404-
rewriteForProxiedHttp(this, optsWithoutSignal);
542+
rewriteForProxiedHttp(this, optsWithoutSignal, proxyAuthority, userHostHeader);
405543
this._storeHeader(this.method + ' ' + this.path + ' HTTP/1.1\r\n',
406544
this[kOutHeaders]);
407545
} else {
408-
rewriteForProxiedHttp(this, optsWithoutSignal);
546+
rewriteForProxiedHttp(this, optsWithoutSignal, proxyAuthority, userHostHeader);
409547
}
410548
} else {
411-
rewriteForProxiedHttp(this, optsWithoutSignal);
549+
rewriteForProxiedHttp(this, optsWithoutSignal, proxyAuthority, undefined, options.headers);
412550
this._storeHeader(this.method + ' ' + this.path + ' HTTP/1.1\r\n',
413551
options.headers);
414552
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// This tests that proxied HTTP requests succeed end-to-end when the
2+
// absolute-form request-target matches the request authority derived from options.
3+
4+
import * as common from '../common/index.mjs';
5+
import assert from 'node:assert';
6+
import http from 'node:http';
7+
import { once } from 'events';
8+
import { createProxyServer } from '../common/proxy-server.js';
9+
10+
const server = http.createServer(common.mustCall((req, res) => {
11+
res.end('Hello world');
12+
}, 6));
13+
server.on('error', common.mustNotCall());
14+
server.listen(0);
15+
await once(server, 'listening');
16+
17+
const { proxy, logs } = createProxyServer();
18+
proxy.listen(0);
19+
await once(proxy, 'listening');
20+
21+
const port = server.address().port;
22+
const serverHost = `localhost:${port}`;
23+
const requestUrl = `http://${serverHost}/test`;
24+
25+
const agent = new http.Agent({
26+
proxyEnv: {
27+
HTTP_PROXY: `http://localhost:${proxy.address().port}`,
28+
},
29+
});
30+
31+
async function roundTrip(options) {
32+
const req = http.request({ agent, ...options });
33+
req.end();
34+
const [res] = await once(req, 'response');
35+
res.setEncoding('utf8');
36+
let body = '';
37+
for await (const chunk of res) body += chunk;
38+
assert.strictEqual(body, 'Hello world');
39+
}
40+
41+
const baseAbsolute = { host: 'localhost', port, path: requestUrl };
42+
43+
const options = [
44+
// No user-supplied headers.
45+
baseAbsolute,
46+
// Object form with an explicit Host that matches.
47+
{ ...baseAbsolute, headers: { Host: serverHost } },
48+
// Flat array form.
49+
{ ...baseAbsolute, headers: ['Host', serverHost] },
50+
// Array-of-pairs form.
51+
{ ...baseAbsolute, headers: [['Host', serverHost]] },
52+
// Contains defaultPort that matches options.port.
53+
{ host: 'localhost', port, defaultPort: port, path: '/test' },
54+
// Stringifiable non-string path object.
55+
{ host: 'localhost', port, path: { toString() { return '/test'; } } },
56+
];
57+
58+
for (const opts of options) {
59+
await roundTrip(opts);
60+
// Check what the proxy server received.
61+
const log = logs.pop();
62+
assert.strictEqual(logs.length, 0);
63+
assert.strictEqual(log.method, 'GET');
64+
assert.strictEqual(log.url, requestUrl);
65+
assert.strictEqual(log.headers.host, serverHost);
66+
}
67+
68+
server.close();
69+
proxy.close();
70+
agent.destroy();

0 commit comments

Comments
 (0)