From 3d2b0d4d5648e39cec6c6af713e8666bdb7e054c Mon Sep 17 00:00:00 2001 From: Dhruv Aggarwal Date: Fri, 5 Jun 2026 22:38:00 +0530 Subject: [PATCH 1/3] Add Playwright proxy samples --- samples/playwright-proxy-tests/.env.example | 9 + samples/playwright-proxy-tests/.gitignore | 3 + samples/playwright-proxy-tests/README.md | 278 ++++++++++++++++++ .../playwright-proxy-tests/connectOverCdp.mjs | 109 +++++++ samples/playwright-proxy-tests/package.json | 16 + .../playwrightConnect.mjs | 124 ++++++++ .../proxy-server/.dockerignore | 3 + .../proxy-server/Dockerfile | 26 ++ .../proxy-server/README.md | 215 ++++++++++++++ .../proxy-server/deploy-azure.ps1 | 111 +++++++ .../proxy-server/package.json | 15 + .../proxy-server/server.mjs | 78 +++++ .../pwwSessionClient.mjs | 73 +++++ samples/playwright-proxy-tests/rawCdp.mjs | 198 +++++++++++++ 14 files changed, 1258 insertions(+) create mode 100644 samples/playwright-proxy-tests/.env.example create mode 100644 samples/playwright-proxy-tests/.gitignore create mode 100644 samples/playwright-proxy-tests/README.md create mode 100644 samples/playwright-proxy-tests/connectOverCdp.mjs create mode 100644 samples/playwright-proxy-tests/package.json create mode 100644 samples/playwright-proxy-tests/playwrightConnect.mjs create mode 100644 samples/playwright-proxy-tests/proxy-server/.dockerignore create mode 100644 samples/playwright-proxy-tests/proxy-server/Dockerfile create mode 100644 samples/playwright-proxy-tests/proxy-server/README.md create mode 100644 samples/playwright-proxy-tests/proxy-server/deploy-azure.ps1 create mode 100644 samples/playwright-proxy-tests/proxy-server/package.json create mode 100644 samples/playwright-proxy-tests/proxy-server/server.mjs create mode 100644 samples/playwright-proxy-tests/pwwSessionClient.mjs create mode 100644 samples/playwright-proxy-tests/rawCdp.mjs diff --git a/samples/playwright-proxy-tests/.env.example b/samples/playwright-proxy-tests/.env.example new file mode 100644 index 0000000..bec6b4b --- /dev/null +++ b/samples/playwright-proxy-tests/.env.example @@ -0,0 +1,9 @@ +# Microsoft Playwright Workspaces (PWW) — region + workspace ID +# Get these from the Azure portal under your Playwright Workspaces resource. +PLAYWRIGHT_SERVICE_URL="wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers" +PLAYWRIGHT_SERVICE_ACCESS_TOKEN="" + +# Authenticated forward proxy (deploy from ./proxy-server, or use your own). +PROXY_SERVER="http://:8080" +PROXY_USERNAME="" +PROXY_PASSWORD="" diff --git a/samples/playwright-proxy-tests/.gitignore b/samples/playwright-proxy-tests/.gitignore new file mode 100644 index 0000000..2e8157a --- /dev/null +++ b/samples/playwright-proxy-tests/.gitignore @@ -0,0 +1,3 @@ +node_modules/ +.env +*.log diff --git a/samples/playwright-proxy-tests/README.md b/samples/playwright-proxy-tests/README.md new file mode 100644 index 0000000..483b247 --- /dev/null +++ b/samples/playwright-proxy-tests/README.md @@ -0,0 +1,278 @@ +# Playwright Workspaces + Authenticated HTTP Proxy — Samples + +Three runnable Node.js samples showing how to route a remote Chromium on +**Microsoft Playwright Workspaces (PWW)** through an **authenticated outbound +HTTP proxy**, plus the proxy itself (deployable to Azure with one script). + +The samples are deliberately small (one file each, no test framework, no +abstraction layer) so you can read the entire request flow end to end and +copy the parts you need into your own code. + +--- + +## Folder layout + +``` +playwright-proxy-tests/ +├── README.md ← you are here +├── .env.example ← copy to .env and fill in +├── package.json ← installs playwright + dotenv +├── connectOverCdp.mjs ← Sample 1: Playwright over CDP (recommended) +├── rawCdp.mjs ← Sample 2: hand-rolled CDP JSON-RPC +├── playwrightConnect.mjs ← Sample 3: Playwright native wire protocol +├── pwwSessionClient.mjs ← helper: gets a CDP wss:// URL from PWW +└── proxy-server/ ← the authenticated proxy (deploy to Azure) + ├── server.mjs + ├── Dockerfile + ├── deploy-azure.ps1 + ├── package.json + └── README.md +``` + +--- + +## The three samples at a glance + +The three samples are **not** identical demos. They differ in (a) what wire +protocol talks to the remote browser and (b) how much of the proxy auth dance +*you* have to write. They also differ in which steps they run: + +| File | Wire protocol to PWW | Proxy auth handled by | Direct step? | Proxied step? | Private origin step? | +| --- | --- | --- | :---: | :---: | :---: | +| [connectOverCdp.mjs](connectOverCdp.mjs) | CDP (`chromium.connectOverCDP`) | Playwright (internal `Fetch.*`) | yes | yes | yes | +| [rawCdp.mjs](rawCdp.mjs) | CDP (raw WebSocket JSON-RPC) | **You — `Fetch.enable` + `Fetch.continueWithAuth`** | no | yes | yes | +| [playwrightConnect.mjs](playwrightConnect.mjs) | Playwright native wire protocol (`chromium.connect`) | PWW server-side (you never see the 407) | yes | yes | yes | + +--- + +## What each sample actually demonstrates + +### 1. `connectOverCdp.mjs` — recommended + +Connects with `chromium.connectOverCDP()` against a one-shot PWW CDP endpoint, +then runs: + +| Step | Context | URL | Expected output | +| ---: | --- | --- | --- | +| 1 | `browser.newContext()` (no proxy) | `https://api.ipify.org` | the PWW container's egress IP | +| 2 | `browser.newContext({ proxy })` | `https://api.ipify.org` | the **proxy's** egress IP | +| 3 | same proxied context | `http://intranet.local:9090` | JSON from the private origin | + +The proxy `407` is handled inside Playwright — your code is just +`newContext({ proxy: { server, username, password } })`. + +### 2. `rawCdp.mjs` — see the protocol explicitly + +No Playwright. Opens a raw WebSocket to the PWW CDP endpoint and writes every +JSON-RPC frame by hand. This is the path to use if you need to **drive PWW +from a non-Node client** (any language with a WebSocket library) or if you're +debugging exactly what Playwright is sending. + +Setup frames (in order): + +``` +Target.createBrowserContext { proxyServer } → browserContextId +Target.createTarget { browserContextId, url } → targetId +Target.attachToTarget { targetId, flatten:true } → sessionId (all subsequent frames carry this) +Page.enable (so we can await Page.loadEventFired) +Runtime.enable (so we can Runtime.evaluate) +Fetch.enable { handleAuthRequests:true, patterns:[*] } (you now own the auth) +``` + +Then it runs two steps through that one proxied session: + +| Step | URL | Expected output | +| ---: | --- | --- | +| 1 | `https://api.ipify.org` | the proxy's egress IP | +| 2 | `http://intranet.local:9090` | JSON from the private origin | + +Run with `$env:CDP_DEBUG=1` to print every frame the script sends (`>>`) and +receives (`<<`). + +### 3. `playwrightConnect.mjs` — PWW does it all + +Connects with `chromium.connect()` against the **PWW service URL** (not a CDP +URL). The connection uses Playwright's native wire protocol over WebSocket, +authenticated with `Authorization: Bearer `. No CDP frames cross your +laptop's network — PWW relays everything on the server side. + +Runs the same three steps as Sample 1. The observable behaviour is identical; +the difference is purely the on-the-wire protocol and where the auth dance +happens (PWW relays it for you). + +--- + +## You have to authenticate twice + +Every sample performs **two** independent authentications. They are unrelated +and easy to confuse: + +1. **To the remote browser host (PWW).** A Bearer access token in the + `Authorization` header on the initial WebSocket upgrade. + - Sample 1 (`connectOverCdp.mjs`) — the token is in the wss URL Playwright + gets from the PWW REST API via [`pwwSessionClient.mjs`](pwwSessionClient.mjs). + - Sample 2 (`rawCdp.mjs`) — same wss URL, opened directly with `new WebSocket(...)`. + - Sample 3 (`playwrightConnect.mjs`) — passed via + `chromium.connect(url, { headers: { Authorization: 'Bearer ...' } })`. + - Lives in `PLAYWRIGHT_SERVICE_ACCESS_TOKEN` in `.env`. If this fails, + you get `401 Authentication failed`. + +2. **To the outbound HTTP proxy.** Standard HTTP Basic auth via the + `Proxy-Authorization: Basic ` header on every CONNECT / + request that traverses the proxy. If it fails, the proxy returns + `407 Proxy Authentication Required`. + - Lives in `PROXY_USERNAME` / `PROXY_PASSWORD` in `.env`. + - How this gets onto the wire is what differs across the three samples + (next section). + +--- + +## How the `407` is answered + +When a proxied request hits the proxy for the first time it gets +`407 Proxy Authentication Required` with `Proxy-Authenticate: Basic`. Someone +has to retry it with `Proxy-Authorization: Basic `. Each +sample arranges that differently. + +### Sample 1 & 3: Playwright handles it + +```js +const ctx = await browser.newContext({ + proxy: { server, username, password }, +}); +``` + +Playwright registers a `Fetch.enable { handleAuthRequests: true }` handler +internally and replies to every `Fetch.authRequired` event with your +credentials when the challenge is from a proxy, or cancels otherwise. Your +code has no callbacks, no event listeners, no protocol concerns. + +### Sample 2: you handle it (this is what makes raw CDP "raw") + +In raw CDP there is no abstraction — `Target.createBrowserContext { proxyServer }` +**only configures which proxy to talk to**, it does NOT configure credentials. +On the very first request, Chromium gets a 407 and stops. To get past it you +have to: + +1. Subscribe to the proxied session's `Fetch.*` events. +2. Enable interception with + `Fetch.enable { handleAuthRequests: true, patterns:[{urlPattern:'*'}] }`. +3. On each `Fetch.authRequired` event, decide based on `authChallenge.source`: + - `'Proxy'` → reply + `Fetch.continueWithAuth { response:'ProvideCredentials', username, password }`. + - `'Server'` → reply `Fetch.continueWithAuth { response:'CancelAuth' }` — + **do not send proxy creds to origin sites**; that would leak them to + any 401 site you visit. +4. On every non-auth `Fetch.requestPaused` event, reply + `Fetch.continueRequest { requestId }` so the request actually goes out. + +If you forget step 4, every request hangs because `Fetch.enable` pauses *all* +requests, not just auth-challenged ones. If you forget the `'Server'` branch +in step 3, you ship a credential-leakage bug. The full handler is in +[rawCdp.mjs](rawCdp.mjs). + +--- + +## Setup (one time) + +You need: + +- **Node.js 18+** (for built-in `WebSocket` and `fetch`). +- **A PWW workspace** + an access token (Azure portal → your Playwright Workspaces resource). +- **An authenticated HTTP proxy.** Either point at your own, or deploy the one + in [proxy-server/](proxy-server/) to Azure Container Instances — see + [proxy-server/README.md](proxy-server/README.md). If you use someone else's + proxy you won't get the `intranet.local:9090` step; the other two steps + still work. + +Then: + +```powershell +# 1) Install dependencies +cd playwright-proxy-tests +npm install + +# 2) Configure credentials +Copy-Item .env.example .env +# Edit .env and fill in: +# PLAYWRIGHT_SERVICE_URL, PLAYWRIGHT_SERVICE_ACCESS_TOKEN, +# PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD +``` + +> `.env` is gitignored. Never commit real credentials. + +`.env` is shared by all three samples — each `.mjs` loads +`new URL('./.env', import.meta.url)` so it works from any cwd. + +--- + +## Running the samples + +```powershell +# Sample 1 — recommended high-level path +npm run sample:connect-over-cdp + +# Sample 2 — raw CDP. Add CDP_DEBUG=1 to print every frame: +npm run sample:raw-cdp +$env:CDP_DEBUG=1; node rawCdp.mjs; Remove-Item env:CDP_DEBUG + +# Sample 3 — Playwright native wire protocol. +# Add DEBUG=pw:* to see Playwright's outbound protocol frames: +npm run sample:playwright-connect +$env:DEBUG="pw:*"; node playwrightConnect.mjs 2>pw.log; Remove-Item env:DEBUG +``` + +Expected: in Samples 1 and 3, the direct step prints one IP, the proxied step +prints a different IP. In Sample 2, the single proxied step prints the same +IP as Samples 1 and 3's proxied step. All three private-origin steps print +the same JSON payload from the proxy container's loopback service. + +--- + +## Key facts worth knowing + +- **PWW exposes one CDP WebSocket per remote browser.** Multi-session work + (`Target.attachToTarget`) requires `flatten: true` so messages multiplex via + `sessionId` on that single socket. See + [crbug/40639208](https://issues.chromium.org/issues/40639208). +- **Per-context proxy is supported.** A Chromium context is bound to its + `proxyServer` at create time. This is why all three samples can mix a + direct context and a proxied context in the same browser (and Samples 1/3 + do exactly that). +- **Auth source matters.** On `Fetch.authRequired`, always check + `authChallenge.source`. Provide credentials only for `'Proxy'`; cancel + otherwise. Sending proxy credentials to an origin server is a leak. +- **`chromium.connect` vs `chromium.connectOverCDP`.** `connect` uses + Playwright's native protocol — PWW relays everything, your laptop never + speaks CDP. `connectOverCDP` opens a real CDP socket to the remote browser + — your machine speaks CDP directly. Either way Chromium-level features + (per-context proxy, etc.) behave the same. +- **Egress IPs.** + - PWW direct → an IP from the Microsoft-owned PWW egress range (varies by region). + - Via the proxy → the SNAT IP of your proxy container (printed by + `deploy-azure.ps1`). + +--- + +## Troubleshooting + +| Symptom | Likely cause | +| --- | --- | +| `Authentication failed. Check your access token.` | Expired or wrong `PLAYWRIGHT_SERVICE_ACCESS_TOKEN`. Regenerate in the portal. | +| `Invalid PLAYWRIGHT_SERVICE_URL format` | Must be `wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers`. | +| Script hangs on the first proxied navigation. | Most common: `PROXY_USERNAME` / `PROXY_PASSWORD` don't match what the proxy was deployed with — the proxy keeps returning 407. | +| `rawCdp.mjs` hangs even though direct CDP works. | You probably forgot to forward non-auth `Fetch.requestPaused` events with `Fetch.continueRequest`. `Fetch.enable` pauses **every** request. | +| Proxy creds appear on an origin site. | You're answering `Fetch.authRequired` with `ProvideCredentials` regardless of `authChallenge.source`. Gate on `=== 'Proxy'`. | +| Hangs on `intranet.local:9090`. | You're not using the proxy in [proxy-server/](proxy-server/) (or its container's `/etc/hosts` wasn't patched — see Dockerfile). | +| Sample 1/3 hangs on `newContext`. | Network can't reach PWW. Check corporate firewall lets `*.api.playwright.microsoft.com:443` through. | +| `connectOverCdp.mjs` works, `rawCdp.mjs` doesn't connect at all. | You called `Target.attachToTarget` without `flatten: true`. PWW's single-socket model requires flattened sessions. | + +--- + +## What to read next + +- [proxy-server/README.md](proxy-server/README.md) — how to deploy the proxy + to Azure Container Instances and why the `intranet.local:9090` private + origin is "proof" the tunnel works. +- The top-of-file docstring in each `.mjs` — recaps the demo steps and gives + copy-pasteable debug commands specific to that sample. diff --git a/samples/playwright-proxy-tests/connectOverCdp.mjs b/samples/playwright-proxy-tests/connectOverCdp.mjs new file mode 100644 index 0000000..5db1fbb --- /dev/null +++ b/samples/playwright-proxy-tests/connectOverCdp.mjs @@ -0,0 +1,109 @@ +/** + * connectOverCdp.mjs + * ============================================================================ + * Sample 1 of 3 — Playwright over CDP (recommended high-level path) + * ============================================================================ + * Drives a remote Chromium on Microsoft Playwright Workspaces (PWW) using + * Playwright's high-level `chromium.connectOverCDP()`. Playwright internally + * emits the same `Target.*` / `Fetch.*` CDP frames you can see in + * `rawCdp.mjs`, but you write a single `browser.newContext({ proxy })` call + * instead of handcrafting the protocol. + * + * This is the path most customers should use. + * + * Demo runs three steps: + * 1. DIRECT context -> https://api.ipify.org?format=json + * (shows the PWW container's public egress IP) + * 2. PROXIED context -> https://api.ipify.org?format=json + * (shows the PROXY's egress IP — request went through the proxy) + * 3. SAME proxied ctx -> http://intranet.local:9090/ + * (private origin only reachable through the proxy — proof the + * tunnel works end to end) + * + * Run: + * node connectOverCdp.mjs + * + * See the CDP frames Playwright is sending under the hood: + * $env:DEBUG = "pw:protocol"; node connectOverCdp.mjs *> cdp.log; Remove-Item env:DEBUG + * Select-String cdp.log -Pattern "createBrowserContext|Fetch.authRequired|continueWithAuth" -Context 0,1 + */ + +import { config } from 'dotenv'; +config({ path: new URL('./.env', import.meta.url) }); + +import { chromium } from 'playwright'; +import { getCdpEndpoint } from './pwwSessionClient.mjs'; + +// ─────────────────────────────────────────────────────────────────────────── // +// Config // +// ─────────────────────────────────────────────────────────────────────────── // + +const { PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD } = process.env; + +const IPIFY_URL = 'https://api.ipify.org?format=json'; +const INTRANET_URL = 'http://intranet.local:9090/'; + +const PROXY = { + server: PROXY_SERVER, // e.g. http://:8080 + username: PROXY_USERNAME, + password: PROXY_PASSWORD, +}; + +// ─────────────────────────────────────────────────────────────────────────── // +// Connect to PWW over CDP // +// getCdpEndpoint() asks the PWW REST API for a one-shot wss:// URL. // +// ─────────────────────────────────────────────────────────────────────────── // + +const cdpEndpoint = await getCdpEndpoint(); +const browser = await chromium.connectOverCDP(cdpEndpoint); + +// ═════════════════════════════════════════════════════════════════════════ // +// STEP 1 — DIRECT context → ipify // +// No `proxy` option => Chromium reaches the internet directly from the // +// PWW container. ipify returns the container's public egress IP. // +// ═════════════════════════════════════════════════════════════════════════ // + +const directContext = await browser.newContext(); +const directPage = await directContext.newPage(); + +await directPage.goto(IPIFY_URL); +const directBody = await directPage.locator('body').innerText(); + +console.log('--- 1) DIRECT -> ipify ---'); +console.log(directBody); + +await directContext.close(); + +// ═════════════════════════════════════════════════════════════════════════ // +// STEP 2 — PROXIED context → ipify // +// Passing `proxy` to newContext makes Chromium route every request from // +// this context through the authenticated HTTP proxy. ipify now reports // +// the PROXY's egress IP, not the container's. // +// ═════════════════════════════════════════════════════════════════════════ // + +const proxiedContext = await browser.newContext({ proxy: PROXY }); +const proxiedPage1 = await proxiedContext.newPage(); + +await proxiedPage1.goto(IPIFY_URL); +const proxiedBody1 = await proxiedPage1.locator('body').innerText(); + +console.log('--- 2) PROXIED -> ipify ---'); +console.log(proxiedBody1); + +// ═════════════════════════════════════════════════════════════════════════ // +// STEP 3 — SAME proxied context → private intranet origin // +// `intranet.local:9090` is a loopback service running INSIDE the proxy // +// container. It is unreachable from the public internet. We can hit it // +// only because the proxy is tunnelling our CONNECT for that hostname. // +// ═════════════════════════════════════════════════════════════════════════ // + +const proxiedPage2 = await proxiedContext.newPage(); + +await proxiedPage2.goto(INTRANET_URL); +const proxiedBody2 = await proxiedPage2.locator('body').innerText(); + +console.log('--- 3) PROXIED -> intranet ---'); +console.log(proxiedBody2); + +await proxiedContext.close(); +await browser.close(); diff --git a/samples/playwright-proxy-tests/package.json b/samples/playwright-proxy-tests/package.json new file mode 100644 index 0000000..b663ffc --- /dev/null +++ b/samples/playwright-proxy-tests/package.json @@ -0,0 +1,16 @@ +{ + "name": "pww-proxy-samples", + "version": "1.0.0", + "type": "module", + "private": true, + "description": "Three working samples for using an authenticated HTTP proxy with Microsoft Playwright Workspaces remote Chromium.", + "scripts": { + "sample:connect-over-cdp": "node connectOverCdp.mjs", + "sample:raw-cdp": "node rawCdp.mjs", + "sample:playwright-connect": "node playwrightConnect.mjs" + }, + "dependencies": { + "dotenv": "^16.4.5", + "playwright": "^1.48.0" + } +} diff --git a/samples/playwright-proxy-tests/playwrightConnect.mjs b/samples/playwright-proxy-tests/playwrightConnect.mjs new file mode 100644 index 0000000..385b9b4 --- /dev/null +++ b/samples/playwright-proxy-tests/playwrightConnect.mjs @@ -0,0 +1,124 @@ +/** + * playwrightConnect.mjs + * ============================================================================ + * Sample 3 of 3 — Playwright's native wire protocol (no CDP on the wire) + * ============================================================================ + * Drives a remote Chromium on Microsoft Playwright Workspaces (PWW) via + * Playwright's NATIVE wire protocol (not CDP). `chromium.connect()` is + * pointed at the PWW service URL with a Bearer-token Authorization header. + * PWW relays `browser.newContext({ proxy })` to the remote Chromium and the + * proxy-auth handshake (Fetch.* frames) is performed entirely server-side. + * From this laptop we never see a CDP frame. + * + * Demo runs three steps so the output lines up with the other two samples: + * 1. DIRECT context -> https://api.ipify.org?format=json + * (shows the LAPTOP's view of the egress IP — the PWW container) + * 2. PROXIED context -> https://api.ipify.org?format=json + * (shows the PROXY's egress IP — request went through the proxy) + * 3. SAME proxied ctx -> http://intranet.local:9090/ + * (private origin only reachable through the proxy) + * + * Run: + * node playwrightConnect.mjs + * + * See the Playwright wire-protocol frames being sent: + * $env:DEBUG="pw:*"; node playwrightConnect.mjs 2>pw.log; Remove-Item env:DEBUG + * Select-String pw.log -Pattern "newContext|setNetworkProxy|proxyServer" -Context 0,1 + */ + +import { config } from 'dotenv'; +config({ path: new URL('./.env', import.meta.url) }); + +import { randomUUID } from 'node:crypto'; +import { chromium } from 'playwright'; + +// ─────────────────────────────────────────────────────────────────────────── // +// Config // +// ─────────────────────────────────────────────────────────────────────────── // + +const { + PLAYWRIGHT_SERVICE_URL, // wss://.api.playwright.microsoft.com/... + PLAYWRIGHT_SERVICE_ACCESS_TOKEN, // PAT issued from the PWW portal + PROXY_SERVER, + PROXY_USERNAME, + PROXY_PASSWORD, +} = process.env; + +const IPIFY_URL = 'https://api.ipify.org?format=json'; +const INTRANET_URL = 'http://intranet.local:9090/'; + +const PROXY = { + server: PROXY_SERVER, + username: PROXY_USERNAME, + password: PROXY_PASSWORD, +}; + +// PWW wire-protocol query string. `runId` is a per-session UUID, `os` picks +// the remote container image, `api-version` pins the contract. +const API_VERSION = '2025-09-01'; +const OS_NAME = 'linux'; +const wsEndpoint = + `${PLAYWRIGHT_SERVICE_URL}` + + `?runId=${encodeURIComponent(randomUUID())}` + + `&os=${OS_NAME}` + + `&api-version=${API_VERSION}`; + +// ─────────────────────────────────────────────────────────────────────────── // +// Connect to PWW over the Playwright wire protocol // +// Auth is a Bearer token in the Authorization header — no CDP socket open. // +// ─────────────────────────────────────────────────────────────────────────── // + +const browser = await chromium.connect(wsEndpoint, { + headers: { Authorization: `Bearer ${PLAYWRIGHT_SERVICE_ACCESS_TOKEN}` }, + timeout: 3 * 60 * 1000, +}); + +// ═════════════════════════════════════════════════════════════════════════ // +// STEP 1 — DIRECT context → ipify // +// No `proxy` option => Chromium reaches the internet directly from the // +// PWW container. ipify returns the container's public egress IP. // +// ═════════════════════════════════════════════════════════════════════════ // + +const directContext = await browser.newContext(); +const directPage = await directContext.newPage(); + +await directPage.goto(IPIFY_URL); +const directBody = await directPage.locator('body').innerText(); + +console.log('--- 1) DIRECT -> ipify ---'); +console.log(directBody); + +await directContext.close(); + +// ═════════════════════════════════════════════════════════════════════════ // +// STEP 2 — PROXIED context → ipify // +// PWW relays the `proxy` setting to the remote browser; proxy 407 auth is // +// handled server-side. ipify now reports the PROXY's egress IP. // +// ═════════════════════════════════════════════════════════════════════════ // + +const proxiedContext = await browser.newContext({ proxy: PROXY }); +const proxiedPage1 = await proxiedContext.newPage(); + +await proxiedPage1.goto(IPIFY_URL); +const proxiedBody1 = await proxiedPage1.locator('body').innerText(); + +console.log('--- 2) PROXIED -> ipify ---'); +console.log(proxiedBody1); + +// ═════════════════════════════════════════════════════════════════════════ // +// STEP 3 — SAME proxied context → private intranet origin // +// `intranet.local:9090` is a loopback service running INSIDE the proxy // +// container. Only reachable because the proxy is tunnelling CONNECT for // +// that hostname. // +// ═════════════════════════════════════════════════════════════════════════ // + +const proxiedPage2 = await proxiedContext.newPage(); + +await proxiedPage2.goto(INTRANET_URL); +const proxiedBody2 = await proxiedPage2.locator('body').innerText(); + +console.log('--- 3) PROXIED -> intranet ---'); +console.log(proxiedBody2); + +await proxiedContext.close(); +await browser.close(); diff --git a/samples/playwright-proxy-tests/proxy-server/.dockerignore b/samples/playwright-proxy-tests/proxy-server/.dockerignore new file mode 100644 index 0000000..c81b8d3 --- /dev/null +++ b/samples/playwright-proxy-tests/proxy-server/.dockerignore @@ -0,0 +1,3 @@ +node_modules +npm-debug.log +.env diff --git a/samples/playwright-proxy-tests/proxy-server/Dockerfile b/samples/playwright-proxy-tests/proxy-server/Dockerfile new file mode 100644 index 0000000..d4326fb --- /dev/null +++ b/samples/playwright-proxy-tests/proxy-server/Dockerfile @@ -0,0 +1,26 @@ +# Dockerfile +# +# Builds the proxy container image. Runs server.mjs which exposes the +# authenticated forward proxy on :8080. Also injects `127.0.0.1 intranet.local` +# into /etc/hosts at start time so the proxy can resolve the friendly name to +# its in-container loopback origin (ACI doesn't let us mount /etc/hosts). +# +# Build + run locally: +# docker build -t pw-proxy . +# docker run --rm -e PROXY_USER=usr -e PROXY_PASS=pwd -p 8080:8080 pw-proxy + +FROM node:20-alpine +WORKDIR /app + +COPY package.json ./ +RUN npm install --omit=dev + +COPY server.mjs ./ + +ENV PORT=8080 +EXPOSE 8080 + +# `intranet.local` should resolve to loopback inside this container, so the +# proxy can tunnel CONNECT requests for that name to the in-container origin. +# We append to /etc/hosts at start time because ACI does not let us mount one. +CMD ["sh", "-c", "echo '127.0.0.1 intranet.local' >> /etc/hosts && node server.mjs"] diff --git a/samples/playwright-proxy-tests/proxy-server/README.md b/samples/playwright-proxy-tests/proxy-server/README.md new file mode 100644 index 0000000..7471db6 --- /dev/null +++ b/samples/playwright-proxy-tests/proxy-server/README.md @@ -0,0 +1,215 @@ +# Authenticated HTTP Proxy on Azure + +A minimal Node.js forward proxy with HTTP Basic auth, packaged for Azure +Container Instances (ACI). Sits between a browser (local, or on +Playwright Workspaces) and the public internet — or a private origin +bundled inside the same container. + +Two services run inside the container: + +| Service | Port | Reachable from | +| -------------------- | ---------------- | -------------------------------------------------------- | +| Authenticated proxy | `8080` (public) | anywhere on the internet (needs user/pass) | +| Private origin | `9090` (loopback)| only from inside the container — i.e. only via the proxy | + +The private origin is what makes the demo provable: `http://intranet.local:9090` +resolves to `127.0.0.1` **inside** the container only (see `Dockerfile`). +The public internet has no route to it. If a remote browser can read it, the +only way it got there is by tunneling through the proxy. + +--- + +## Files + +| File | Purpose | +| --------------------- | -------------------------------------------------- | +| `server.mjs` | The proxy + the private origin (single process) | +| `Dockerfile` | Container image, injects `intranet.local` host | +| `package.json` | Deps: `proxy-chain` | +| `deploy-azure.ps1` | One-shot Azure deploy: ACR build → ACI run | +| `.dockerignore` | Skip `node_modules` etc. when building | + +--- + +## Prerequisites + +- An Azure subscription with permission to create ACR + ACI in a resource group. +- [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli) (`az --version`). +- (Optional, only for running locally) Node 18+. + +You do **not** need Docker installed — the image is built in the cloud by ACR. + +--- + +## Deploy to Azure (recommended) + +```powershell +az login + +.\deploy-azure.ps1 ` + -ResourceGroup "" ` + -ProxyUser "" ` + -ProxyPass "" +``` + +Optional flags: `-Location `, `-AppName `, `-Port ` (default `8080`). + +The script will: + +1. Reuse the resource group if it exists, else create it. +2. Create an Azure Container Registry, build the image in the cloud (`az acr build`). +3. Tear down any previous container with the same name. +4. Deploy to Azure Container Instances with a deterministic DNS label + (re-running the script gives you the same FQDN, so your `.env` doesn't churn). +5. Print the final FQDN, IP, port, and a ready-to-paste smoke test. + +Sample output: + +``` +============================================== + Proxy deployed. + FQDN : -..azurecontainer.io + IP : + Port : 8080 + User : +============================================== +``` + +Smoke test: + +```powershell +curl.exe -x "http://:@:8080" https://example.com -I +``` + +Expected: `HTTP/1.1 200 OK` from `example.com`, plus a line in the container +log: `[proxy] HTTPS -> example.com:443`. + +--- + +## How clients use it + +Set these three values in the project root `.env` (see `../.env.example`): + +``` +PROXY_SERVER = http://:8080 +PROXY_USERNAME = +PROXY_PASSWORD = +``` + +### Playwright (per-context) +```js +const context = await browser.newContext({ + proxy: { + server: process.env.PROXY_SERVER, + username: process.env.PROXY_USERNAME, + password: process.env.PROXY_PASSWORD, + }, +}); +``` + +### Raw CDP (`Target.createBrowserContext`) +```js +const { browserContextId } = await send('Target.createBrowserContext', { + proxyServer: process.env.PROXY_SERVER, +}); +// Authenticate via Fetch.enable + Fetch.continueWithAuth +``` + +### curl +```powershell +curl.exe -x "http://:@:8080" https://example.com +``` + +--- + +## Behaviour + +- **HTTP**: proxy receives the full request, forwards it, returns the response. + Logged as `[proxy] HTTP -> :`. +- **HTTPS**: client sends `CONNECT host:443`, proxy authenticates, opens a TCP + tunnel; all traffic after that is end-to-end TLS — the proxy only sees the + hostname. Logged as `[proxy] HTTPS -> :`. +- **Bad credentials**: returns `407 Proxy Authentication Required` with the + message `Bad username or password, please try again.` Nothing is forwarded. +- **No destination filtering**: once authenticated, any destination is allowed. + Treat the credentials as a shared secret — anyone with them can browse the + internet via your ACI egress IP. +- **Egress IP**: the public IP of the ACI instance (the script prints it). + Target sites see that IP, not the caller's IP, not Playwright Workspaces. + +--- + +## The internal "private origin" + +For demo / validation purposes only: + +- An `http.createServer` inside `server.mjs` listens on `127.0.0.1:9090`. +- The Dockerfile appends `127.0.0.1 intranet.local` to `/etc/hosts` inside the + container, so `http://intranet.local:9090` resolves to that loopback server + **only when DNS is performed inside the container**. +- When a remote browser sends `CONNECT intranet.local:9090` to the proxy, the + proxy resolves the hostname locally and tunnels to itself. The browser gets + the JSON payload. +- Anyone trying to hit `http://intranet.local:9090` *without* going through the + proxy gets nothing — there is no public route. + +This is the proof: if a Playwright test running on remote Playwright +Workspaces browsers can read the JSON, the only path it could have taken is +through this proxy. + +--- + +## Run locally (optional, for development) + +```powershell +npm install +$env:PROXY_USER="usr"; $env:PROXY_PASS="pwd"; $env:PORT="8080" +node server.mjs +``` + +Test: + +```powershell +curl.exe -x "http://usr:pwd@127.0.0.1:8080" https://example.com -I +``` + +--- + +## Logs + +Stream container logs: +```powershell +az container logs -g -n pw-proxy --follow +``` + +You should see one line per request: +``` +[proxy] HTTPS 20.x.x.x -> example.com:443 +[proxy] HTTPS 20.x.x.x -> intranet.local:9090 +[private] GET / from 127.0.0.1 +``` + +--- + +## Tear down + +```powershell +az container delete -g -n pw-proxy --yes +az acr list -g --query "[?starts_with(name,'pwproxyacr')].name" -o tsv | + ForEach-Object { az acr delete -g -n $_ --yes } +``` + +--- + +## Security notes + +- Credentials travel in the `Proxy-Authorization` header. The hop from client + to proxy is **plaintext HTTP** on port 8080 — anyone on the path can sniff + the proxy creds. For production, front this with TLS (e.g. Caddy / Nginx / + Application Gateway terminating HTTPS for the proxy URL itself, or run it + inside a VNet and access via Private Link). +- Rotate `ProxyPass` regularly; it's the only thing protecting your egress IP + from being used by strangers. +- The script stores `PROXY_PASS` as a `--secure-environment-variables` on the + container (not visible in `az container show`). It is still visible to anyone + with `Contributor` on the resource group. diff --git a/samples/playwright-proxy-tests/proxy-server/deploy-azure.ps1 b/samples/playwright-proxy-tests/proxy-server/deploy-azure.ps1 new file mode 100644 index 0000000..f18e880 --- /dev/null +++ b/samples/playwright-proxy-tests/proxy-server/deploy-azure.ps1 @@ -0,0 +1,111 @@ +# deploy-azure.ps1 +# +# Builds the proxy-server image in ACR and deploys it to Azure Container +# Instances with a public IP + DNS label. ACI is used (not Container Apps) +# because the HTTP CONNECT method used for HTTPS needs raw TCP passthrough +# on a public endpoint; ACI gives that on any port. +# +# Prereqs: az login +# +# Run: +# ./deploy-azure.ps1 -ResourceGroup my-rg -ProxyUser usr -ProxyPass '' +# +# Optional overrides: +# -Location default: existing RG location, else eastus +# -AcrName default: pwproxyacr +# -AppName default: pw-proxy +# -Port default: 8080 +# +# View live container logs: +# az container logs -g -n --follow + +param( + [Parameter(Mandatory=$true)][string]$ResourceGroup, + [string]$Location = "", + [string]$AcrName = "pwproxyacr$((Get-Random -Maximum 99999))", + [string]$AppName = "pw-proxy", + [int] $Port = 8080, + [Parameter(Mandatory=$true)][string]$ProxyUser, + [Parameter(Mandatory=$true)][string]$ProxyPass +) + +$ErrorActionPreference = "Stop" + +$rgExists = az group exists -n $ResourceGroup +if ($rgExists -eq "true") { + Write-Host "==> Using existing resource group $ResourceGroup" + if (-not $Location) { + $Location = az group show -n $ResourceGroup --query location -o tsv + } + Write-Host " (location: $Location)" +} else { + if (-not $Location) { $Location = "eastus" } + Write-Host "==> Creating resource group $ResourceGroup in $Location ..." + az group create -n $ResourceGroup -l $Location | Out-Null +} + +Write-Host "==> Creating ACR $AcrName ..." +az acr create -g $ResourceGroup -n $AcrName --sku Basic --admin-enabled true | Out-Null + +Write-Host "==> Building image in ACR ..." +az acr build -r $AcrName -t "pw-proxy:latest" . | Out-Null + +$acrServer = az acr show -n $AcrName --query loginServer -o tsv +$acrUser = az acr credential show -n $AcrName --query username -o tsv +$acrPass = az acr credential show -n $AcrName --query "passwords[0].value" -o tsv + +# Tear down any stale instance with the same name from a previous failed run. +# az writes "not found" to stderr which $ErrorActionPreference=Stop would treat +# as terminating; suppress both streams and rely on $LASTEXITCODE instead. +$existing = & { $ErrorActionPreference = 'Continue'; az container show -g $ResourceGroup -n $AppName 2>&1 | Out-Null; $LASTEXITCODE } +if ($existing -eq 0) { + Write-Host "==> Removing previous container instance $AppName ..." + az container delete -g $ResourceGroup -n $AppName --yes | Out-Null +} + +# DNS label must be globally unique within the region. +# Derive it deterministically from subscription + RG + app name so re-running +# this script reuses the same FQDN (no need to update .env after each deploy). +$subId = az account show --query id -o tsv +$hash = [BitConverter]::ToString( + [System.Security.Cryptography.SHA1]::Create().ComputeHash( + [System.Text.Encoding]::UTF8.GetBytes("$subId|$ResourceGroup|$AppName") + ) + ).Replace('-', '').Substring(0, 6).ToLower() +$dnsLabel = "$AppName-$hash".ToLower() + +Write-Host "==> Deploying ACI $AppName ($dnsLabel) ..." +az container create ` + -g $ResourceGroup ` + -n $AppName ` + --image "$acrServer/pw-proxy:latest" ` + --registry-login-server $acrServer ` + --registry-username $acrUser ` + --registry-password $acrPass ` + --os-type Linux ` + --cpu 1 --memory 1 ` + --ports $Port ` + --ip-address Public ` + --dns-name-label $dnsLabel ` + --environment-variables "PROXY_USER=$ProxyUser" "PORT=$Port" ` + --secure-environment-variables "PROXY_PASS=$ProxyPass" | Out-Null + +$fqdn = az container show -g $ResourceGroup -n $AppName --query "ipAddress.fqdn" -o tsv +$ip = az container show -g $ResourceGroup -n $AppName --query "ipAddress.ip" -o tsv + +Write-Host "" +Write-Host "==============================================" +Write-Host " Proxy deployed." +Write-Host " FQDN : $fqdn" +Write-Host " IP : $ip" +Write-Host " Port : $Port" +Write-Host " User : $ProxyUser" +Write-Host "==============================================" +Write-Host "" +Write-Host "Smoke test:" +Write-Host " curl.exe -x http://$($ProxyUser):$ProxyPass@$($fqdn):$Port https://example.com -I" +Write-Host "" +Write-Host "Set in your .env:" +Write-Host " PROXY_SERVER=http://$($fqdn):$Port" +Write-Host " PROXY_USERNAME=$ProxyUser" +Write-Host " PROXY_PASSWORD=" diff --git a/samples/playwright-proxy-tests/proxy-server/package.json b/samples/playwright-proxy-tests/proxy-server/package.json new file mode 100644 index 0000000..adba2f9 --- /dev/null +++ b/samples/playwright-proxy-tests/proxy-server/package.json @@ -0,0 +1,15 @@ +{ + "name": "pww-proxy-server", + "version": "1.0.0", + "type": "module", + "main": "server.mjs", + "scripts": { + "start": "node server.mjs" + }, + "engines": { + "node": ">=18" + }, + "dependencies": { + "proxy-chain": "^2.5.7" + } +} diff --git a/samples/playwright-proxy-tests/proxy-server/server.mjs b/samples/playwright-proxy-tests/proxy-server/server.mjs new file mode 100644 index 0000000..8378e49 --- /dev/null +++ b/samples/playwright-proxy-tests/proxy-server/server.mjs @@ -0,0 +1,78 @@ +/** + * server.mjs + * + * The proxy container's two services: + * 1. Public authenticated HTTP/HTTPS forward proxy on :PORT (default 8080), + * via `proxy-chain` with Basic auth (PROXY_USER / PROXY_PASS). + * 2. Private origin on 127.0.0.1:9090. Bound only to loopback so it is NOT + * reachable from the public internet, PWW, or your laptop — only the + * proxy itself can tunnel CONNECT traffic to it. The Dockerfile entry + * adds `127.0.0.1 intranet.local` to /etc/hosts so the friendly name + * resolves inside the container. + * + * Env: + * PORT listen port for the public proxy (default 8080) + * PROXY_USER Basic-auth username (required) + * PROXY_PASS Basic-auth password (required) + * + * Run locally: + * $env:PROXY_USER="usr"; $env:PROXY_PASS="pwd"; node server.mjs + * + * In production this is launched by Dockerfile + deploy-azure.ps1 on Azure + * Container Instances. Container stdout shows every CONNECT/GET (proxy log) + * and `[private] ...` lines whenever the private origin is hit. + */ + +import http from 'node:http'; +import { Server } from 'proxy-chain'; + +const PORT = parseInt(process.env.PORT || '8080', 10); +const USER = process.env.PROXY_USER; +const PASS = process.env.PROXY_PASS; +const PRIVATE_PORT = 9090; + +if (!USER || !PASS) { + console.error('FATAL: PROXY_USER and PROXY_PASS env vars must be set.'); + process.exit(1); +} + +// --- Private origin: 127.0.0.1:9090 (loopback only) --- +const privateOrigin = http.createServer((req, res) => { + console.log(`[private] ${req.method} ${req.url} from ${req.socket.remoteAddress}`); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + message: 'Hello from the PRIVATE origin behind the proxy.', + note: 'You are reading this because your CONNECT was tunneled by the proxy on 127.0.0.1:9090. The public internet cannot see this server.', + receivedHost: req.headers.host, + timestamp: new Date().toISOString(), + }, null, 2)); +}); +privateOrigin.listen(PRIVATE_PORT, '127.0.0.1', () => { + console.log(`Private origin listening on 127.0.0.1:${PRIVATE_PORT} (loopback only)`); +}); + +// --- Public auth proxy --- +const server = new Server({ + port: PORT, + verbose: false, + prepareRequestFunction: ({ request, username, password, hostname, port, isHttp }) => { + const ok = username === USER && password === PASS; + if (!ok) { + return { + requestAuthentication: true, + failMsg: 'Bad username or password, please try again.', + }; + } + const srcIp = request?.socket?.remoteAddress; + console.log(`[proxy] ${isHttp ? 'HTTP' : 'HTTPS'} ${srcIp} -> ${hostname}:${port}`); + return { upstreamProxyUrl: null }; + }, +}); + +server.listen(() => { + console.log(`Authenticated proxy listening on 0.0.0.0:${PORT}`); +}); + +server.on('requestFailed', ({ request, error }) => { + console.error(`[proxy] request failed: ${request?.url} - ${error.message}`); +}); diff --git a/samples/playwright-proxy-tests/pwwSessionClient.mjs b/samples/playwright-proxy-tests/pwwSessionClient.mjs new file mode 100644 index 0000000..1e64c1a --- /dev/null +++ b/samples/playwright-proxy-tests/pwwSessionClient.mjs @@ -0,0 +1,73 @@ +/** + * pwwSessionClient.mjs + * + * Tiny helper: asks Microsoft Playwright Workspaces (PWW) for a one-shot + * wss:// CDP endpoint, given PLAYWRIGHT_SERVICE_URL + PLAYWRIGHT_SERVICE_ACCESS_TOKEN. + * + * Imported by `rawCdp.mjs` and `connectOverCdp.mjs`. Not meant to be run directly. + * + * Usage: + * import { getCdpEndpoint } from './pwwSessionClient.mjs'; + * const cdpUrl = await getCdpEndpoint(); + * const browser = await chromium.connectOverCDP(cdpUrl); + */ + +export class PlaywrightServiceError extends Error { + constructor(message) { + super(message); + this.name = 'PlaywrightServiceError'; + } +} + +function parseServiceUrl(url) { + const match = url.match(/wss:\/\/(\w+)\.api\.playwright\.microsoft\.com\/playwrightworkspaces\/([^/]+)\/browsers/); + if (!match) { + throw new PlaywrightServiceError( + `Invalid PLAYWRIGHT_SERVICE_URL format: ${url}\n` + + 'Expected: wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers' + ); + } + return { region: match[1], workspaceId: match[2] }; +} + +export async function getCdpEndpoint(serviceUrl = null, accessToken = null, osName = 'Linux') { + serviceUrl = serviceUrl || process.env.PLAYWRIGHT_SERVICE_URL; + accessToken = accessToken || process.env.PLAYWRIGHT_SERVICE_ACCESS_TOKEN; + + if (!serviceUrl) { + throw new PlaywrightServiceError( + 'PLAYWRIGHT_SERVICE_URL environment variable is not set.\n' + + 'Expected: wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers' + ); + } + if (!accessToken) { + throw new PlaywrightServiceError('PLAYWRIGHT_SERVICE_ACCESS_TOKEN environment variable is not set.'); + } + + const { region, workspaceId } = parseServiceUrl(serviceUrl); + + const apiUrl = `https://${region}.api.playwright.microsoft.com/playwrightworkspaces/${workspaceId}/browsers?os=${osName}&browser=chromium&playwrightVersion=cdp&shouldRedirect=false`; + + const headers = { + 'Authorization': `Bearer ${accessToken}`, + 'Accept': 'application/json', + }; + + const response = await fetch(apiUrl, { headers }); + + if (response.status === 401) { + throw new PlaywrightServiceError('Authentication failed. Check your access token.'); + } + if (response.status === 403) { + throw new PlaywrightServiceError('Access forbidden. Check your permissions.'); + } + if (!response.ok) { + const text = await response.text(); + throw new PlaywrightServiceError(`Failed to get browser endpoint: HTTP ${response.status}\n${text}`); + } + + const data = await response.json(); + const correlationId = new URL(data.sessionUrl).searchParams.get('correlationId'); + console.log('PWW session (correlationId):', correlationId); + return data.sessionUrl; +} diff --git a/samples/playwright-proxy-tests/rawCdp.mjs b/samples/playwright-proxy-tests/rawCdp.mjs new file mode 100644 index 0000000..9119de4 --- /dev/null +++ b/samples/playwright-proxy-tests/rawCdp.mjs @@ -0,0 +1,198 @@ +/** + * rawCdp.mjs + * ============================================================================ + * Sample 2 of 3 — Raw CDP (no Playwright, hand-rolled JSON-RPC) + * ============================================================================ + * Drives a remote Chromium on Microsoft Playwright Workspaces (PWW) over a + * CDP WebSocket by writing every `Target.*` / `Page.*` / `Fetch.*` frame by + * hand. This is what `connectOverCdp.mjs` emits under the hood — shown + * explicitly so the wire protocol is fully visible. + * + * The proxy-auth dance, expanded: + * Target.createBrowserContext { proxyServer } (context bound to proxy) + * Target.createTarget { browserContextId } (open a tab there) + * Target.attachToTarget { targetId, flatten } (get a sessionId) + * Page.enable / Runtime.enable (navigate + evaluate) + * Fetch.enable { handleAuthRequests: true } (intercept 407) + * Fetch.authRequired ← proxy responded 407 Proxy-Authenticate + * Fetch.continueWithAuth { ProvideCredentials, username, password } + * + * Demo runs two steps through the proxied session: + * 1. PROXIED session -> https://api.ipify.org?format=json + * (shows the PROXY's egress IP) + * 2. SAME proxied sess -> http://intranet.local:9090/ + * (private origin only reachable through the proxy) + * + * Run: + * node rawCdp.mjs + * + * Print every CDP frame sent (>>) and received (<<): + * $env:CDP_DEBUG=1; node rawCdp.mjs; Remove-Item env:CDP_DEBUG + */ + +import { config } from 'dotenv'; +config({ path: new URL('./.env', import.meta.url) }); + +import { getCdpEndpoint } from './pwwSessionClient.mjs'; + +// ─────────────────────────────────────────────────────────────────────────── // +// Config // +// ─────────────────────────────────────────────────────────────────────────── // + +const { PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD } = process.env; + +const IPIFY_URL = 'https://api.ipify.org?format=json'; +const INTRANET_URL = 'http://intranet.local:9090/'; + +const DEBUG = process.env.CDP_DEBUG === '1'; +const trunc = (s, n = 200) => (s.length > n ? s.slice(0, n) + '…' : s); + +// ─────────────────────────────────────────────────────────────────────────── // +// Open the CDP WebSocket to PWW // +// getCdpEndpoint() asks the PWW REST API for a one-shot wss:// URL pointing // +// at a remote Chromium's CDP socket. // +// ─────────────────────────────────────────────────────────────────────────── // + +const ws = new WebSocket(await getCdpEndpoint()); +await new Promise((resolve, reject) => { ws.onopen = resolve; ws.onerror = reject; }); + +// ─────────────────────────────────────────────────────────────────────────── // +// Tiny JSON-RPC client over the WebSocket // +// CDP frames are JSON. Requests carry numeric `id`; responses echo it. // +// Anything without `id` is an event we broadcast to listeners. // +// ─────────────────────────────────────────────────────────────────────────── // + +let nextId = 0; +const pending = new Map(); // id -> resolver fn +const listeners = new Set(); // event handlers + +ws.onmessage = async (ev) => { + const text = typeof ev.data === 'string' ? ev.data : await ev.data.text(); + if (DEBUG) console.log('<<', trunc(text)); + const msg = JSON.parse(text); + if (msg.id != null) { + pending.get(msg.id)?.(msg); + pending.delete(msg.id); + } else { + for (const fn of listeners) fn(msg); + } +}; + +const send = (method, params = {}, sessionId) => + new Promise((resolve, reject) => { + const id = ++nextId; + pending.set(id, (m) => m.error + ? reject(new Error(`${method}: ${m.error.message}`)) + : resolve(m.result)); + const frame = JSON.stringify({ id, method, params, sessionId }); + if (DEBUG) console.log('>>', trunc(frame)); + ws.send(frame); + }); + +// Convenience: wait for a single event matching (sessionId, method). +const waitForEvent = (sessionId, method) => + new Promise((resolve) => { + const fn = (m) => { + if (m.sessionId === sessionId && m.method === method) { + listeners.delete(fn); + resolve(m); + } + }; + listeners.add(fn); + }); + +// ═════════════════════════════════════════════════════════════════════════ // +// ONE-TIME SETUP // +// ═════════════════════════════════════════════════════════════════════════ // + +// (a) Create the proxied browser context + a page inside it. +const { browserContextId } = await send('Target.createBrowserContext', { + proxyServer: PROXY_SERVER, +}); +const { targetId: proxiedTargetId } = await send('Target.createTarget', { + url: 'about:blank', + browserContextId, +}); + +// (b) Attach to the proxied target to get a sessionId for driving it. +// `flatten: true` is required so messages multiplex via sessionId on the +// single CDP socket PWW exposes per browser. +const { sessionId: proxiedSession } = await send('Target.attachToTarget', { targetId: proxiedTargetId, flatten: true }); + +// (c) Enable Page/Runtime so we can navigate + evaluate. +await send('Page.enable', {}, proxiedSession); +await send('Runtime.enable', {}, proxiedSession); + +// (d) Wire up proxy-auth interception. Fetch.enable pauses every request; +// our listener provides credentials on a Proxy 407 and forwards the rest. +await send('Fetch.enable', { + handleAuthRequests: true, + patterns: [{ urlPattern: '*' }], +}, proxiedSession); + +listeners.add((m) => { + if (m.sessionId !== proxiedSession) return; + + if (m.method === 'Fetch.authRequired') { + if (m.params.authChallenge.source === 'Proxy') { + // Proxy returned 407 Proxy-Authenticate. Provide creds; Chromium + // will retry the request with Proxy-Authorization: Basic ... + send('Fetch.continueWithAuth', { + requestId: m.params.requestId, + authChallengeResponse: { + response: 'ProvideCredentials', + username: PROXY_USERNAME, + password: PROXY_PASSWORD, + }, + }, proxiedSession); + } else { + // Origin server 401 — NEVER leak proxy creds to the target site. + send('Fetch.continueWithAuth', { + requestId: m.params.requestId, + authChallengeResponse: { response: 'CancelAuth' }, + }, proxiedSession); + } + } else if (m.method === 'Fetch.requestPaused') { + // Not an auth challenge: forward the request unchanged. + send('Fetch.continueRequest', { requestId: m.params.requestId }, proxiedSession); + } +}); + +// ═════════════════════════════════════════════════════════════════════════ // +// STEP 1 — PROXIED session → ipify // +// Triggers the Fetch.authRequired / continueWithAuth dance our listener // +// handles. ipify reports the PROXY's egress IP. // +// ═════════════════════════════════════════════════════════════════════════ // + +{ + const loaded = waitForEvent(proxiedSession, 'Page.loadEventFired'); + await send('Page.navigate', { url: IPIFY_URL }, proxiedSession); + await loaded; + + const { result } = await send('Runtime.evaluate', + { expression: 'document.body.innerText' }, proxiedSession); + + console.log('--- 1) PROXIED -> ipify ---'); + console.log(result.value); +} + +// ═════════════════════════════════════════════════════════════════════════ // +// STEP 2 — SAME proxied session → private intranet origin // +// intranet.local:9090 is a loopback service INSIDE the proxy container, // +// unreachable from the public internet. Works only because the proxy is // +// tunnelling CONNECT for that hostname. // +// ═════════════════════════════════════════════════════════════════════════ // + +{ + const loaded = waitForEvent(proxiedSession, 'Page.loadEventFired'); + await send('Page.navigate', { url: INTRANET_URL }, proxiedSession); + await loaded; + + const { result } = await send('Runtime.evaluate', + { expression: 'document.body.innerText' }, proxiedSession); + + console.log('--- 2) PROXIED -> intranet ---'); + console.log(result.value); +} + +ws.close(); From 59c0c25af4db6fa42adbef2df4232493adbe3ad0 Mon Sep 17 00:00:00 2001 From: Dhruv Aggarwal Date: Mon, 8 Jun 2026 15:45:45 +0530 Subject: [PATCH 2/3] Remove proxy setup from samples --- samples/playwright-proxy-tests/.env.example | 8 +- samples/playwright-proxy-tests/README.md | 65 +++--- .../playwright-proxy-tests/connectOverCdp.mjs | 38 ++-- .../playwrightConnect.mjs | 39 ++-- .../proxy-server/.dockerignore | 3 - .../proxy-server/Dockerfile | 26 --- .../proxy-server/README.md | 215 ------------------ .../proxy-server/deploy-azure.ps1 | 111 --------- .../proxy-server/package.json | 15 -- .../proxy-server/server.mjs | 78 ------- samples/playwright-proxy-tests/rawCdp.mjs | 23 +- 11 files changed, 88 insertions(+), 533 deletions(-) delete mode 100644 samples/playwright-proxy-tests/proxy-server/.dockerignore delete mode 100644 samples/playwright-proxy-tests/proxy-server/Dockerfile delete mode 100644 samples/playwright-proxy-tests/proxy-server/README.md delete mode 100644 samples/playwright-proxy-tests/proxy-server/deploy-azure.ps1 delete mode 100644 samples/playwright-proxy-tests/proxy-server/package.json delete mode 100644 samples/playwright-proxy-tests/proxy-server/server.mjs diff --git a/samples/playwright-proxy-tests/.env.example b/samples/playwright-proxy-tests/.env.example index bec6b4b..261396c 100644 --- a/samples/playwright-proxy-tests/.env.example +++ b/samples/playwright-proxy-tests/.env.example @@ -3,7 +3,13 @@ PLAYWRIGHT_SERVICE_URL="wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers" PLAYWRIGHT_SERVICE_ACCESS_TOKEN="" -# Authenticated forward proxy (deploy from ./proxy-server, or use your own). +# Authenticated forward proxy (point at your own). PROXY_SERVER="http://:8080" PROXY_USERNAME="" PROXY_PASSWORD="" + +# A URL reachable through your proxy (e.g. a private intranet origin, a +# service whitelisted by your proxy's egress IP, etc.). Each sample's final +# step fetches it through the proxied context to prove the proxy is tunnelling +# traffic end to end. Replace with one of your own. +PROXY_ONLY_URL="http://intranet.example/healthcheck" diff --git a/samples/playwright-proxy-tests/README.md b/samples/playwright-proxy-tests/README.md index 483b247..1937136 100644 --- a/samples/playwright-proxy-tests/README.md +++ b/samples/playwright-proxy-tests/README.md @@ -2,12 +2,15 @@ Three runnable Node.js samples showing how to route a remote Chromium on **Microsoft Playwright Workspaces (PWW)** through an **authenticated outbound -HTTP proxy**, plus the proxy itself (deployable to Azure with one script). +HTTP proxy** that you provide. The samples are deliberately small (one file each, no test framework, no abstraction layer) so you can read the entire request flow end to end and copy the parts you need into your own code. +> You bring your own authenticated forward proxy. These samples only show +> how to drive PWW through one — they do not deploy a proxy for you. + --- ## Folder layout @@ -20,13 +23,7 @@ playwright-proxy-tests/ ├── connectOverCdp.mjs ← Sample 1: Playwright over CDP (recommended) ├── rawCdp.mjs ← Sample 2: hand-rolled CDP JSON-RPC ├── playwrightConnect.mjs ← Sample 3: Playwright native wire protocol -├── pwwSessionClient.mjs ← helper: gets a CDP wss:// URL from PWW -└── proxy-server/ ← the authenticated proxy (deploy to Azure) - ├── server.mjs - ├── Dockerfile - ├── deploy-azure.ps1 - ├── package.json - └── README.md +└── pwwSessionClient.mjs ← helper: gets a CDP wss:// URL from PWW ``` --- @@ -37,12 +34,16 @@ The three samples are **not** identical demos. They differ in (a) what wire protocol talks to the remote browser and (b) how much of the proxy auth dance *you* have to write. They also differ in which steps they run: -| File | Wire protocol to PWW | Proxy auth handled by | Direct step? | Proxied step? | Private origin step? | +| File | Wire protocol to PWW | Proxy auth handled by | Direct step? | Proxied step? | Proxy-only URL step? | | --- | --- | --- | :---: | :---: | :---: | | [connectOverCdp.mjs](connectOverCdp.mjs) | CDP (`chromium.connectOverCDP`) | Playwright (internal `Fetch.*`) | yes | yes | yes | | [rawCdp.mjs](rawCdp.mjs) | CDP (raw WebSocket JSON-RPC) | **You — `Fetch.enable` + `Fetch.continueWithAuth`** | no | yes | yes | | [playwrightConnect.mjs](playwrightConnect.mjs) | Playwright native wire protocol (`chromium.connect`) | PWW server-side (you never see the 407) | yes | yes | yes | +The "proxy-only URL" step navigates to `PROXY_ONLY_URL` from `.env` (see +[Setup](#setup-one-time) below) — plug in any URL you want fetched through +the proxy. + --- ## What each sample actually demonstrates @@ -56,7 +57,7 @@ then runs: | ---: | --- | --- | --- | | 1 | `browser.newContext()` (no proxy) | `https://api.ipify.org` | the PWW container's egress IP | | 2 | `browser.newContext({ proxy })` | `https://api.ipify.org` | the **proxy's** egress IP | -| 3 | same proxied context | `http://intranet.local:9090` | JSON from the private origin | +| 3 | same proxied context | `$PROXY_ONLY_URL` | whatever your URL returns | The proxy `407` is handled inside Playwright — your code is just `newContext({ proxy: { server, username, password } })`. @@ -79,12 +80,12 @@ Runtime.enable (so we can Runtime.eval Fetch.enable { handleAuthRequests:true, patterns:[*] } (you now own the auth) ``` -Then it runs two steps through that one proxied session: +Then it runs through that proxied session: | Step | URL | Expected output | | ---: | --- | --- | -| 1 | `https://api.ipify.org` | the proxy's egress IP | -| 2 | `http://intranet.local:9090` | JSON from the private origin | +| 1 | `https://api.ipify.org` | the proxy's egress IP | +| 2 | `$PROXY_ONLY_URL` | whatever your URL returns | Run with `$env:CDP_DEBUG=1` to print every frame the script sends (`>>`) and receives (`<<`). @@ -96,9 +97,9 @@ URL). The connection uses Playwright's native wire protocol over WebSocket, authenticated with `Authorization: Bearer `. No CDP frames cross your laptop's network — PWW relays everything on the server side. -Runs the same three steps as Sample 1. The observable behaviour is identical; -the difference is purely the on-the-wire protocol and where the auth dance -happens (PWW relays it for you). +Runs the same steps as Sample 1, including the `$PROXY_ONLY_URL` step. The +observable behaviour is identical; the difference is purely the on-the-wire +protocol and where the auth dance happens (PWW relays it for you). --- @@ -179,11 +180,14 @@ You need: - **Node.js 18+** (for built-in `WebSocket` and `fetch`). - **A PWW workspace** + an access token (Azure portal → your Playwright Workspaces resource). -- **An authenticated HTTP proxy.** Either point at your own, or deploy the one - in [proxy-server/](proxy-server/) to Azure Container Instances — see - [proxy-server/README.md](proxy-server/README.md). If you use someone else's - proxy you won't get the `intranet.local:9090` step; the other two steps - still work. +- **An authenticated HTTP proxy you control.** Point the samples at it via + `PROXY_SERVER` / `PROXY_USERNAME` / `PROXY_PASSWORD`. To prove the proxy is + actually in the request path, the samples compare the egress IP reported by + `api.ipify.org` with and without the proxy — they should differ. +- **A URL to fetch through the proxy.** Set `PROXY_ONLY_URL` in `.env` to + any hostname your proxy can reach — a private intranet origin, an + IP-allowlisted service, or even a public URL. Each sample's final step + navigates to it through the proxied context. Then: @@ -196,7 +200,7 @@ npm install Copy-Item .env.example .env # Edit .env and fill in: # PLAYWRIGHT_SERVICE_URL, PLAYWRIGHT_SERVICE_ACCESS_TOKEN, -# PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD +# PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD, PROXY_ONLY_URL ``` > `.env` is gitignored. Never commit real credentials. @@ -222,10 +226,11 @@ npm run sample:playwright-connect $env:DEBUG="pw:*"; node playwrightConnect.mjs 2>pw.log; Remove-Item env:DEBUG ``` -Expected: in Samples 1 and 3, the direct step prints one IP, the proxied step -prints a different IP. In Sample 2, the single proxied step prints the same -IP as Samples 1 and 3's proxied step. All three private-origin steps print -the same JSON payload from the proxy container's loopback service. +Expected: in Samples 1 and 3, the direct step prints one IP and the proxied +step prints a different IP (the proxy's egress IP). In Sample 2, the single +proxied IP step prints the same IP as Samples 1 and 3's proxied step. The +final step in each sample prints the body of `PROXY_ONLY_URL` fetched +through the proxied context. --- @@ -249,8 +254,7 @@ the same JSON payload from the proxy container's loopback service. (per-context proxy, etc.) behave the same. - **Egress IPs.** - PWW direct → an IP from the Microsoft-owned PWW egress range (varies by region). - - Via the proxy → the SNAT IP of your proxy container (printed by - `deploy-azure.ps1`). + - Via the proxy → the egress (SNAT) IP of your proxy. --- @@ -263,7 +267,7 @@ the same JSON payload from the proxy container's loopback service. | Script hangs on the first proxied navigation. | Most common: `PROXY_USERNAME` / `PROXY_PASSWORD` don't match what the proxy was deployed with — the proxy keeps returning 407. | | `rawCdp.mjs` hangs even though direct CDP works. | You probably forgot to forward non-auth `Fetch.requestPaused` events with `Fetch.continueRequest`. `Fetch.enable` pauses **every** request. | | Proxy creds appear on an origin site. | You're answering `Fetch.authRequired` with `ProvideCredentials` regardless of `authChallenge.source`. Gate on `=== 'Proxy'`. | -| Hangs on `intranet.local:9090`. | You're not using the proxy in [proxy-server/](proxy-server/) (or its container's `/etc/hosts` wasn't patched — see Dockerfile). | +| `PROXY_ONLY_URL` step hangs or 502s. | The URL isn't reachable through your proxy (DNS, ACL, or proxy isn't tunnelling CONNECT for that host). Try the URL from a client behind the proxy first. | | Sample 1/3 hangs on `newContext`. | Network can't reach PWW. Check corporate firewall lets `*.api.playwright.microsoft.com:443` through. | | `connectOverCdp.mjs` works, `rawCdp.mjs` doesn't connect at all. | You called `Target.attachToTarget` without `flatten: true`. PWW's single-socket model requires flattened sessions. | @@ -271,8 +275,5 @@ the same JSON payload from the proxy container's loopback service. ## What to read next -- [proxy-server/README.md](proxy-server/README.md) — how to deploy the proxy - to Azure Container Instances and why the `intranet.local:9090` private - origin is "proof" the tunnel works. - The top-of-file docstring in each `.mjs` — recaps the demo steps and gives copy-pasteable debug commands specific to that sample. diff --git a/samples/playwright-proxy-tests/connectOverCdp.mjs b/samples/playwright-proxy-tests/connectOverCdp.mjs index 5db1fbb..78241b0 100644 --- a/samples/playwright-proxy-tests/connectOverCdp.mjs +++ b/samples/playwright-proxy-tests/connectOverCdp.mjs @@ -16,9 +16,9 @@ * (shows the PWW container's public egress IP) * 2. PROXIED context -> https://api.ipify.org?format=json * (shows the PROXY's egress IP — request went through the proxy) - * 3. SAME proxied ctx -> http://intranet.local:9090/ - * (private origin only reachable through the proxy — proof the - * tunnel works end to end) + * 3. SAME proxied ctx -> $PROXY_ONLY_URL + * (fetches a URL of your choice through the proxy — e.g. a private + * intranet origin only reachable via your proxy) * * Run: * node connectOverCdp.mjs @@ -38,10 +38,9 @@ import { getCdpEndpoint } from './pwwSessionClient.mjs'; // Config // // ─────────────────────────────────────────────────────────────────────────── // -const { PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD } = process.env; +const { PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD, PROXY_ONLY_URL } = process.env; -const IPIFY_URL = 'https://api.ipify.org?format=json'; -const INTRANET_URL = 'http://intranet.local:9090/'; +const IPIFY_URL = 'https://api.ipify.org?format=json'; const PROXY = { server: PROXY_SERVER, // e.g. http://:8080 @@ -82,28 +81,27 @@ await directContext.close(); // ═════════════════════════════════════════════════════════════════════════ // const proxiedContext = await browser.newContext({ proxy: PROXY }); -const proxiedPage1 = await proxiedContext.newPage(); +const proxiedPage = await proxiedContext.newPage(); -await proxiedPage1.goto(IPIFY_URL); -const proxiedBody1 = await proxiedPage1.locator('body').innerText(); +await proxiedPage.goto(IPIFY_URL); +const proxiedBody = await proxiedPage.locator('body').innerText(); console.log('--- 2) PROXIED -> ipify ---'); -console.log(proxiedBody1); +console.log(proxiedBody); // ═════════════════════════════════════════════════════════════════════════ // -// STEP 3 — SAME proxied context → private intranet origin // -// `intranet.local:9090` is a loopback service running INSIDE the proxy // -// container. It is unreachable from the public internet. We can hit it // -// only because the proxy is tunnelling our CONNECT for that hostname. // +// STEP 3 — SAME proxied context → a URL of your choice // +// Customer-supplied via PROXY_ONLY_URL in .env. Use any hostname that is // +// reachable through your proxy (private intranet, IP-allowlisted service, // +// etc.). // // ═════════════════════════════════════════════════════════════════════════ // -const proxiedPage2 = await proxiedContext.newPage(); +const proxyOnlyPage = await proxiedContext.newPage(); +await proxyOnlyPage.goto(PROXY_ONLY_URL); +const proxyOnlyBody = await proxyOnlyPage.locator('body').innerText(); -await proxiedPage2.goto(INTRANET_URL); -const proxiedBody2 = await proxiedPage2.locator('body').innerText(); - -console.log('--- 3) PROXIED -> intranet ---'); -console.log(proxiedBody2); +console.log(`--- 3) PROXIED -> ${PROXY_ONLY_URL} ---`); +console.log(proxyOnlyBody); await proxiedContext.close(); await browser.close(); diff --git a/samples/playwright-proxy-tests/playwrightConnect.mjs b/samples/playwright-proxy-tests/playwrightConnect.mjs index 385b9b4..bb9bc3f 100644 --- a/samples/playwright-proxy-tests/playwrightConnect.mjs +++ b/samples/playwright-proxy-tests/playwrightConnect.mjs @@ -10,13 +10,14 @@ * proxy-auth handshake (Fetch.* frames) is performed entirely server-side. * From this laptop we never see a CDP frame. * - * Demo runs three steps so the output lines up with the other two samples: + * Demo runs three steps: * 1. DIRECT context -> https://api.ipify.org?format=json - * (shows the LAPTOP's view of the egress IP — the PWW container) + * (shows the PWW container's public egress IP) * 2. PROXIED context -> https://api.ipify.org?format=json * (shows the PROXY's egress IP — request went through the proxy) - * 3. SAME proxied ctx -> http://intranet.local:9090/ - * (private origin only reachable through the proxy) + * 3. SAME proxied ctx -> $PROXY_ONLY_URL + * (fetches a URL of your choice through the proxy — e.g. a private + * intranet origin only reachable via your proxy) * * Run: * node playwrightConnect.mjs @@ -42,10 +43,10 @@ const { PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD, + PROXY_ONLY_URL, } = process.env; -const IPIFY_URL = 'https://api.ipify.org?format=json'; -const INTRANET_URL = 'http://intranet.local:9090/'; +const IPIFY_URL = 'https://api.ipify.org?format=json'; const PROXY = { server: PROXY_SERVER, @@ -97,28 +98,26 @@ await directContext.close(); // ═════════════════════════════════════════════════════════════════════════ // const proxiedContext = await browser.newContext({ proxy: PROXY }); -const proxiedPage1 = await proxiedContext.newPage(); +const proxiedPage = await proxiedContext.newPage(); -await proxiedPage1.goto(IPIFY_URL); -const proxiedBody1 = await proxiedPage1.locator('body').innerText(); +await proxiedPage.goto(IPIFY_URL); +const proxiedBody = await proxiedPage.locator('body').innerText(); console.log('--- 2) PROXIED -> ipify ---'); -console.log(proxiedBody1); +console.log(proxiedBody); // ═════════════════════════════════════════════════════════════════════════ // -// STEP 3 — SAME proxied context → private intranet origin // -// `intranet.local:9090` is a loopback service running INSIDE the proxy // -// container. Only reachable because the proxy is tunnelling CONNECT for // -// that hostname. // +// STEP 3 — SAME proxied context → a URL of your choice // +// Customer-supplied via PROXY_ONLY_URL in .env. Use any hostname that is // +// reachable through your proxy. // // ═════════════════════════════════════════════════════════════════════════ // -const proxiedPage2 = await proxiedContext.newPage(); +const proxyOnlyPage = await proxiedContext.newPage(); +await proxyOnlyPage.goto(PROXY_ONLY_URL); +const proxyOnlyBody = await proxyOnlyPage.locator('body').innerText(); -await proxiedPage2.goto(INTRANET_URL); -const proxiedBody2 = await proxiedPage2.locator('body').innerText(); - -console.log('--- 3) PROXIED -> intranet ---'); -console.log(proxiedBody2); +console.log(`--- 3) PROXIED -> ${PROXY_ONLY_URL} ---`); +console.log(proxyOnlyBody); await proxiedContext.close(); await browser.close(); diff --git a/samples/playwright-proxy-tests/proxy-server/.dockerignore b/samples/playwright-proxy-tests/proxy-server/.dockerignore deleted file mode 100644 index c81b8d3..0000000 --- a/samples/playwright-proxy-tests/proxy-server/.dockerignore +++ /dev/null @@ -1,3 +0,0 @@ -node_modules -npm-debug.log -.env diff --git a/samples/playwright-proxy-tests/proxy-server/Dockerfile b/samples/playwright-proxy-tests/proxy-server/Dockerfile deleted file mode 100644 index d4326fb..0000000 --- a/samples/playwright-proxy-tests/proxy-server/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -# Dockerfile -# -# Builds the proxy container image. Runs server.mjs which exposes the -# authenticated forward proxy on :8080. Also injects `127.0.0.1 intranet.local` -# into /etc/hosts at start time so the proxy can resolve the friendly name to -# its in-container loopback origin (ACI doesn't let us mount /etc/hosts). -# -# Build + run locally: -# docker build -t pw-proxy . -# docker run --rm -e PROXY_USER=usr -e PROXY_PASS=pwd -p 8080:8080 pw-proxy - -FROM node:20-alpine -WORKDIR /app - -COPY package.json ./ -RUN npm install --omit=dev - -COPY server.mjs ./ - -ENV PORT=8080 -EXPOSE 8080 - -# `intranet.local` should resolve to loopback inside this container, so the -# proxy can tunnel CONNECT requests for that name to the in-container origin. -# We append to /etc/hosts at start time because ACI does not let us mount one. -CMD ["sh", "-c", "echo '127.0.0.1 intranet.local' >> /etc/hosts && node server.mjs"] diff --git a/samples/playwright-proxy-tests/proxy-server/README.md b/samples/playwright-proxy-tests/proxy-server/README.md deleted file mode 100644 index 7471db6..0000000 --- a/samples/playwright-proxy-tests/proxy-server/README.md +++ /dev/null @@ -1,215 +0,0 @@ -# Authenticated HTTP Proxy on Azure - -A minimal Node.js forward proxy with HTTP Basic auth, packaged for Azure -Container Instances (ACI). Sits between a browser (local, or on -Playwright Workspaces) and the public internet — or a private origin -bundled inside the same container. - -Two services run inside the container: - -| Service | Port | Reachable from | -| -------------------- | ---------------- | -------------------------------------------------------- | -| Authenticated proxy | `8080` (public) | anywhere on the internet (needs user/pass) | -| Private origin | `9090` (loopback)| only from inside the container — i.e. only via the proxy | - -The private origin is what makes the demo provable: `http://intranet.local:9090` -resolves to `127.0.0.1` **inside** the container only (see `Dockerfile`). -The public internet has no route to it. If a remote browser can read it, the -only way it got there is by tunneling through the proxy. - ---- - -## Files - -| File | Purpose | -| --------------------- | -------------------------------------------------- | -| `server.mjs` | The proxy + the private origin (single process) | -| `Dockerfile` | Container image, injects `intranet.local` host | -| `package.json` | Deps: `proxy-chain` | -| `deploy-azure.ps1` | One-shot Azure deploy: ACR build → ACI run | -| `.dockerignore` | Skip `node_modules` etc. when building | - ---- - -## Prerequisites - -- An Azure subscription with permission to create ACR + ACI in a resource group. -- [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli) (`az --version`). -- (Optional, only for running locally) Node 18+. - -You do **not** need Docker installed — the image is built in the cloud by ACR. - ---- - -## Deploy to Azure (recommended) - -```powershell -az login - -.\deploy-azure.ps1 ` - -ResourceGroup "" ` - -ProxyUser "" ` - -ProxyPass "" -``` - -Optional flags: `-Location `, `-AppName `, `-Port ` (default `8080`). - -The script will: - -1. Reuse the resource group if it exists, else create it. -2. Create an Azure Container Registry, build the image in the cloud (`az acr build`). -3. Tear down any previous container with the same name. -4. Deploy to Azure Container Instances with a deterministic DNS label - (re-running the script gives you the same FQDN, so your `.env` doesn't churn). -5. Print the final FQDN, IP, port, and a ready-to-paste smoke test. - -Sample output: - -``` -============================================== - Proxy deployed. - FQDN : -..azurecontainer.io - IP : - Port : 8080 - User : -============================================== -``` - -Smoke test: - -```powershell -curl.exe -x "http://:@:8080" https://example.com -I -``` - -Expected: `HTTP/1.1 200 OK` from `example.com`, plus a line in the container -log: `[proxy] HTTPS -> example.com:443`. - ---- - -## How clients use it - -Set these three values in the project root `.env` (see `../.env.example`): - -``` -PROXY_SERVER = http://:8080 -PROXY_USERNAME = -PROXY_PASSWORD = -``` - -### Playwright (per-context) -```js -const context = await browser.newContext({ - proxy: { - server: process.env.PROXY_SERVER, - username: process.env.PROXY_USERNAME, - password: process.env.PROXY_PASSWORD, - }, -}); -``` - -### Raw CDP (`Target.createBrowserContext`) -```js -const { browserContextId } = await send('Target.createBrowserContext', { - proxyServer: process.env.PROXY_SERVER, -}); -// Authenticate via Fetch.enable + Fetch.continueWithAuth -``` - -### curl -```powershell -curl.exe -x "http://:@:8080" https://example.com -``` - ---- - -## Behaviour - -- **HTTP**: proxy receives the full request, forwards it, returns the response. - Logged as `[proxy] HTTP -> :`. -- **HTTPS**: client sends `CONNECT host:443`, proxy authenticates, opens a TCP - tunnel; all traffic after that is end-to-end TLS — the proxy only sees the - hostname. Logged as `[proxy] HTTPS -> :`. -- **Bad credentials**: returns `407 Proxy Authentication Required` with the - message `Bad username or password, please try again.` Nothing is forwarded. -- **No destination filtering**: once authenticated, any destination is allowed. - Treat the credentials as a shared secret — anyone with them can browse the - internet via your ACI egress IP. -- **Egress IP**: the public IP of the ACI instance (the script prints it). - Target sites see that IP, not the caller's IP, not Playwright Workspaces. - ---- - -## The internal "private origin" - -For demo / validation purposes only: - -- An `http.createServer` inside `server.mjs` listens on `127.0.0.1:9090`. -- The Dockerfile appends `127.0.0.1 intranet.local` to `/etc/hosts` inside the - container, so `http://intranet.local:9090` resolves to that loopback server - **only when DNS is performed inside the container**. -- When a remote browser sends `CONNECT intranet.local:9090` to the proxy, the - proxy resolves the hostname locally and tunnels to itself. The browser gets - the JSON payload. -- Anyone trying to hit `http://intranet.local:9090` *without* going through the - proxy gets nothing — there is no public route. - -This is the proof: if a Playwright test running on remote Playwright -Workspaces browsers can read the JSON, the only path it could have taken is -through this proxy. - ---- - -## Run locally (optional, for development) - -```powershell -npm install -$env:PROXY_USER="usr"; $env:PROXY_PASS="pwd"; $env:PORT="8080" -node server.mjs -``` - -Test: - -```powershell -curl.exe -x "http://usr:pwd@127.0.0.1:8080" https://example.com -I -``` - ---- - -## Logs - -Stream container logs: -```powershell -az container logs -g -n pw-proxy --follow -``` - -You should see one line per request: -``` -[proxy] HTTPS 20.x.x.x -> example.com:443 -[proxy] HTTPS 20.x.x.x -> intranet.local:9090 -[private] GET / from 127.0.0.1 -``` - ---- - -## Tear down - -```powershell -az container delete -g -n pw-proxy --yes -az acr list -g --query "[?starts_with(name,'pwproxyacr')].name" -o tsv | - ForEach-Object { az acr delete -g -n $_ --yes } -``` - ---- - -## Security notes - -- Credentials travel in the `Proxy-Authorization` header. The hop from client - to proxy is **plaintext HTTP** on port 8080 — anyone on the path can sniff - the proxy creds. For production, front this with TLS (e.g. Caddy / Nginx / - Application Gateway terminating HTTPS for the proxy URL itself, or run it - inside a VNet and access via Private Link). -- Rotate `ProxyPass` regularly; it's the only thing protecting your egress IP - from being used by strangers. -- The script stores `PROXY_PASS` as a `--secure-environment-variables` on the - container (not visible in `az container show`). It is still visible to anyone - with `Contributor` on the resource group. diff --git a/samples/playwright-proxy-tests/proxy-server/deploy-azure.ps1 b/samples/playwright-proxy-tests/proxy-server/deploy-azure.ps1 deleted file mode 100644 index f18e880..0000000 --- a/samples/playwright-proxy-tests/proxy-server/deploy-azure.ps1 +++ /dev/null @@ -1,111 +0,0 @@ -# deploy-azure.ps1 -# -# Builds the proxy-server image in ACR and deploys it to Azure Container -# Instances with a public IP + DNS label. ACI is used (not Container Apps) -# because the HTTP CONNECT method used for HTTPS needs raw TCP passthrough -# on a public endpoint; ACI gives that on any port. -# -# Prereqs: az login -# -# Run: -# ./deploy-azure.ps1 -ResourceGroup my-rg -ProxyUser usr -ProxyPass '' -# -# Optional overrides: -# -Location default: existing RG location, else eastus -# -AcrName default: pwproxyacr -# -AppName default: pw-proxy -# -Port default: 8080 -# -# View live container logs: -# az container logs -g -n --follow - -param( - [Parameter(Mandatory=$true)][string]$ResourceGroup, - [string]$Location = "", - [string]$AcrName = "pwproxyacr$((Get-Random -Maximum 99999))", - [string]$AppName = "pw-proxy", - [int] $Port = 8080, - [Parameter(Mandatory=$true)][string]$ProxyUser, - [Parameter(Mandatory=$true)][string]$ProxyPass -) - -$ErrorActionPreference = "Stop" - -$rgExists = az group exists -n $ResourceGroup -if ($rgExists -eq "true") { - Write-Host "==> Using existing resource group $ResourceGroup" - if (-not $Location) { - $Location = az group show -n $ResourceGroup --query location -o tsv - } - Write-Host " (location: $Location)" -} else { - if (-not $Location) { $Location = "eastus" } - Write-Host "==> Creating resource group $ResourceGroup in $Location ..." - az group create -n $ResourceGroup -l $Location | Out-Null -} - -Write-Host "==> Creating ACR $AcrName ..." -az acr create -g $ResourceGroup -n $AcrName --sku Basic --admin-enabled true | Out-Null - -Write-Host "==> Building image in ACR ..." -az acr build -r $AcrName -t "pw-proxy:latest" . | Out-Null - -$acrServer = az acr show -n $AcrName --query loginServer -o tsv -$acrUser = az acr credential show -n $AcrName --query username -o tsv -$acrPass = az acr credential show -n $AcrName --query "passwords[0].value" -o tsv - -# Tear down any stale instance with the same name from a previous failed run. -# az writes "not found" to stderr which $ErrorActionPreference=Stop would treat -# as terminating; suppress both streams and rely on $LASTEXITCODE instead. -$existing = & { $ErrorActionPreference = 'Continue'; az container show -g $ResourceGroup -n $AppName 2>&1 | Out-Null; $LASTEXITCODE } -if ($existing -eq 0) { - Write-Host "==> Removing previous container instance $AppName ..." - az container delete -g $ResourceGroup -n $AppName --yes | Out-Null -} - -# DNS label must be globally unique within the region. -# Derive it deterministically from subscription + RG + app name so re-running -# this script reuses the same FQDN (no need to update .env after each deploy). -$subId = az account show --query id -o tsv -$hash = [BitConverter]::ToString( - [System.Security.Cryptography.SHA1]::Create().ComputeHash( - [System.Text.Encoding]::UTF8.GetBytes("$subId|$ResourceGroup|$AppName") - ) - ).Replace('-', '').Substring(0, 6).ToLower() -$dnsLabel = "$AppName-$hash".ToLower() - -Write-Host "==> Deploying ACI $AppName ($dnsLabel) ..." -az container create ` - -g $ResourceGroup ` - -n $AppName ` - --image "$acrServer/pw-proxy:latest" ` - --registry-login-server $acrServer ` - --registry-username $acrUser ` - --registry-password $acrPass ` - --os-type Linux ` - --cpu 1 --memory 1 ` - --ports $Port ` - --ip-address Public ` - --dns-name-label $dnsLabel ` - --environment-variables "PROXY_USER=$ProxyUser" "PORT=$Port" ` - --secure-environment-variables "PROXY_PASS=$ProxyPass" | Out-Null - -$fqdn = az container show -g $ResourceGroup -n $AppName --query "ipAddress.fqdn" -o tsv -$ip = az container show -g $ResourceGroup -n $AppName --query "ipAddress.ip" -o tsv - -Write-Host "" -Write-Host "==============================================" -Write-Host " Proxy deployed." -Write-Host " FQDN : $fqdn" -Write-Host " IP : $ip" -Write-Host " Port : $Port" -Write-Host " User : $ProxyUser" -Write-Host "==============================================" -Write-Host "" -Write-Host "Smoke test:" -Write-Host " curl.exe -x http://$($ProxyUser):$ProxyPass@$($fqdn):$Port https://example.com -I" -Write-Host "" -Write-Host "Set in your .env:" -Write-Host " PROXY_SERVER=http://$($fqdn):$Port" -Write-Host " PROXY_USERNAME=$ProxyUser" -Write-Host " PROXY_PASSWORD=" diff --git a/samples/playwright-proxy-tests/proxy-server/package.json b/samples/playwright-proxy-tests/proxy-server/package.json deleted file mode 100644 index adba2f9..0000000 --- a/samples/playwright-proxy-tests/proxy-server/package.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "name": "pww-proxy-server", - "version": "1.0.0", - "type": "module", - "main": "server.mjs", - "scripts": { - "start": "node server.mjs" - }, - "engines": { - "node": ">=18" - }, - "dependencies": { - "proxy-chain": "^2.5.7" - } -} diff --git a/samples/playwright-proxy-tests/proxy-server/server.mjs b/samples/playwright-proxy-tests/proxy-server/server.mjs deleted file mode 100644 index 8378e49..0000000 --- a/samples/playwright-proxy-tests/proxy-server/server.mjs +++ /dev/null @@ -1,78 +0,0 @@ -/** - * server.mjs - * - * The proxy container's two services: - * 1. Public authenticated HTTP/HTTPS forward proxy on :PORT (default 8080), - * via `proxy-chain` with Basic auth (PROXY_USER / PROXY_PASS). - * 2. Private origin on 127.0.0.1:9090. Bound only to loopback so it is NOT - * reachable from the public internet, PWW, or your laptop — only the - * proxy itself can tunnel CONNECT traffic to it. The Dockerfile entry - * adds `127.0.0.1 intranet.local` to /etc/hosts so the friendly name - * resolves inside the container. - * - * Env: - * PORT listen port for the public proxy (default 8080) - * PROXY_USER Basic-auth username (required) - * PROXY_PASS Basic-auth password (required) - * - * Run locally: - * $env:PROXY_USER="usr"; $env:PROXY_PASS="pwd"; node server.mjs - * - * In production this is launched by Dockerfile + deploy-azure.ps1 on Azure - * Container Instances. Container stdout shows every CONNECT/GET (proxy log) - * and `[private] ...` lines whenever the private origin is hit. - */ - -import http from 'node:http'; -import { Server } from 'proxy-chain'; - -const PORT = parseInt(process.env.PORT || '8080', 10); -const USER = process.env.PROXY_USER; -const PASS = process.env.PROXY_PASS; -const PRIVATE_PORT = 9090; - -if (!USER || !PASS) { - console.error('FATAL: PROXY_USER and PROXY_PASS env vars must be set.'); - process.exit(1); -} - -// --- Private origin: 127.0.0.1:9090 (loopback only) --- -const privateOrigin = http.createServer((req, res) => { - console.log(`[private] ${req.method} ${req.url} from ${req.socket.remoteAddress}`); - res.writeHead(200, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ - message: 'Hello from the PRIVATE origin behind the proxy.', - note: 'You are reading this because your CONNECT was tunneled by the proxy on 127.0.0.1:9090. The public internet cannot see this server.', - receivedHost: req.headers.host, - timestamp: new Date().toISOString(), - }, null, 2)); -}); -privateOrigin.listen(PRIVATE_PORT, '127.0.0.1', () => { - console.log(`Private origin listening on 127.0.0.1:${PRIVATE_PORT} (loopback only)`); -}); - -// --- Public auth proxy --- -const server = new Server({ - port: PORT, - verbose: false, - prepareRequestFunction: ({ request, username, password, hostname, port, isHttp }) => { - const ok = username === USER && password === PASS; - if (!ok) { - return { - requestAuthentication: true, - failMsg: 'Bad username or password, please try again.', - }; - } - const srcIp = request?.socket?.remoteAddress; - console.log(`[proxy] ${isHttp ? 'HTTP' : 'HTTPS'} ${srcIp} -> ${hostname}:${port}`); - return { upstreamProxyUrl: null }; - }, -}); - -server.listen(() => { - console.log(`Authenticated proxy listening on 0.0.0.0:${PORT}`); -}); - -server.on('requestFailed', ({ request, error }) => { - console.error(`[proxy] request failed: ${request?.url} - ${error.message}`); -}); diff --git a/samples/playwright-proxy-tests/rawCdp.mjs b/samples/playwright-proxy-tests/rawCdp.mjs index 9119de4..4ef056e 100644 --- a/samples/playwright-proxy-tests/rawCdp.mjs +++ b/samples/playwright-proxy-tests/rawCdp.mjs @@ -18,10 +18,11 @@ * Fetch.continueWithAuth { ProvideCredentials, username, password } * * Demo runs two steps through the proxied session: - * 1. PROXIED session -> https://api.ipify.org?format=json + * 1. PROXIED session -> https://api.ipify.org?format=json * (shows the PROXY's egress IP) - * 2. SAME proxied sess -> http://intranet.local:9090/ - * (private origin only reachable through the proxy) + * 2. SAME session -> $PROXY_ONLY_URL + * (fetches a URL of your choice through the proxy — e.g. a private + * intranet origin only reachable via your proxy) * * Run: * node rawCdp.mjs @@ -39,10 +40,9 @@ import { getCdpEndpoint } from './pwwSessionClient.mjs'; // Config // // ─────────────────────────────────────────────────────────────────────────── // -const { PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD } = process.env; +const { PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD, PROXY_ONLY_URL } = process.env; -const IPIFY_URL = 'https://api.ipify.org?format=json'; -const INTRANET_URL = 'http://intranet.local:9090/'; +const IPIFY_URL = 'https://api.ipify.org?format=json'; const DEBUG = process.env.CDP_DEBUG === '1'; const trunc = (s, n = 200) => (s.length > n ? s.slice(0, n) + '…' : s); @@ -177,21 +177,20 @@ listeners.add((m) => { } // ═════════════════════════════════════════════════════════════════════════ // -// STEP 2 — SAME proxied session → private intranet origin // -// intranet.local:9090 is a loopback service INSIDE the proxy container, // -// unreachable from the public internet. Works only because the proxy is // -// tunnelling CONNECT for that hostname. // +// STEP 2 — SAME proxied session → a URL of your choice // +// Customer-supplied via PROXY_ONLY_URL in .env. Use any hostname that is // +// reachable through your proxy. // // ═════════════════════════════════════════════════════════════════════════ // { const loaded = waitForEvent(proxiedSession, 'Page.loadEventFired'); - await send('Page.navigate', { url: INTRANET_URL }, proxiedSession); + await send('Page.navigate', { url: PROXY_ONLY_URL }, proxiedSession); await loaded; const { result } = await send('Runtime.evaluate', { expression: 'document.body.innerText' }, proxiedSession); - console.log('--- 2) PROXIED -> intranet ---'); + console.log(`--- 2) PROXIED -> ${PROXY_ONLY_URL} ---`); console.log(result.value); } From fa634e92af09d6079536683a1169086316adf1f5 Mon Sep 17 00:00:00 2001 From: Dhruv Aggarwal Date: Tue, 9 Jun 2026 10:49:03 +0530 Subject: [PATCH 3/3] Add Proxy usecase in exisiting samples --- samples/cdp-tests/.env.example | 7 + samples/cdp-tests/README.md | 25 +- samples/cdp-tests/cdpUseScript.py | 149 ++++++++++ samples/cdp-tests/connectOverCDPScript.js | 27 ++ samples/cdp-tests/puppeteerScript.js | 68 +++++ samples/cdp-tests/requirements.txt | 3 + samples/playwright-lib/ReadMe.md | 19 +- samples/playwright-lib/src/example-proxy.ts | 54 ++++ samples/playwright-proxy-tests/.env.example | 15 - samples/playwright-proxy-tests/.gitignore | 3 - samples/playwright-proxy-tests/README.md | 279 ------------------ .../playwright-proxy-tests/connectOverCdp.mjs | 107 ------- samples/playwright-proxy-tests/package.json | 16 - .../playwrightConnect.mjs | 123 -------- .../pwwSessionClient.mjs | 73 ----- samples/playwright-proxy-tests/rawCdp.mjs | 197 ------------- samples/playwright-tests/Readme.md | 25 ++ .../playwright.service.proxy.config.ts | 39 +++ .../tests-proxy/proxy.spec.ts | 19 ++ 19 files changed, 432 insertions(+), 816 deletions(-) create mode 100644 samples/cdp-tests/cdpUseScript.py create mode 100644 samples/cdp-tests/puppeteerScript.js create mode 100644 samples/playwright-lib/src/example-proxy.ts delete mode 100644 samples/playwright-proxy-tests/.env.example delete mode 100644 samples/playwright-proxy-tests/.gitignore delete mode 100644 samples/playwright-proxy-tests/README.md delete mode 100644 samples/playwright-proxy-tests/connectOverCdp.mjs delete mode 100644 samples/playwright-proxy-tests/package.json delete mode 100644 samples/playwright-proxy-tests/playwrightConnect.mjs delete mode 100644 samples/playwright-proxy-tests/pwwSessionClient.mjs delete mode 100644 samples/playwright-proxy-tests/rawCdp.mjs create mode 100644 samples/playwright-tests/playwright.service.proxy.config.ts create mode 100644 samples/playwright-tests/tests-proxy/proxy.spec.ts diff --git a/samples/cdp-tests/.env.example b/samples/cdp-tests/.env.example index 7b26d57..bb3d9ce 100644 --- a/samples/cdp-tests/.env.example +++ b/samples/cdp-tests/.env.example @@ -9,3 +9,10 @@ PLAYWRIGHT_SERVICE_ACCESS_TOKEN= AZURE_OPENAI_API_KEY= AZURE_OPENAI_ENDPOINT= AZURE_OPENAI_API_VERSION= + +# Authenticated forward proxy (Required only if you switch the entry point in +# connectOverCDPScript.js / puppeteerScript.js / cdpUseScript.py to the +# `mainWithProxy()` / `main_with_proxy()` variant) +PROXY_SERVER= +PROXY_USERNAME= +PROXY_PASSWORD= diff --git a/samples/cdp-tests/README.md b/samples/cdp-tests/README.md index a57d6dd..52725be 100644 --- a/samples/cdp-tests/README.md +++ b/samples/cdp-tests/README.md @@ -8,8 +8,10 @@ Samples for connecting to Microsoft Playwright Service via CDP (Chrome DevTools |------|----------|----------|-------------| | `playwright_service_client.py` | Python | Core Module | Shared Python client for all samples | | `playwrightServiceClient.js` | JavaScript | Core Module | Shared JavaScript client | -| `connectOverCDPScript.py` | Python | **Manual** | Simple connect_over_cdp example | -| `connectOverCDPScript.js` | JavaScript | **Manual** | Simple connectOverCDP example | +| `connectOverCDPScript.py` | Python | **Manual** | Playwright `connect_over_cdp` example | +| `connectOverCDPScript.js` | JavaScript | **Manual** | Playwright `connectOverCDP` example | +| `puppeteerScript.js` | JavaScript | **Manual** | Puppeteer over CDP (proxy variant in same file) | +| `cdpUseScript.py` | Python | **Manual** | Raw CDP via `cdp-use` (proxy variant in same file) | | `test_runner.py` | Python | **Testing** | Test runner with helpers | | `Browser-Use-Remote.py` | Python | **AI Agent** | Browser-Use + Azure OpenAI | @@ -107,8 +109,27 @@ PLAYWRIGHT_SERVICE_ACCESS_TOKEN=your_access_token AZURE_OPENAI_API_KEY=your_api_key AZURE_OPENAI_ENDPOINT=https://.openai.azure.com/ AZURE_OPENAI_API_VERSION=2023-07-01-preview + +# For the opt-in proxy snippets only +PROXY_SERVER=http://:8080 +PROXY_USERNAME= +PROXY_PASSWORD= ``` +## 🌐 Optional: Authenticated HTTP proxy + +Each manual sample includes a separate proxy entry-point function alongside +the normal `main()`. It isn't run by default — to use it, change the call at +the bottom of the file from `main()` to the proxy variant and set +`PROXY_SERVER` / `PROXY_USERNAME` / `PROXY_PASSWORD` in your env. + +- [connectOverCDPScript.js](./connectOverCDPScript.js) — `mainWithProxy()` uses `newContext({ proxy })` +- [puppeteerScript.js](./puppeteerScript.js) — `mainWithProxy()` uses `createBrowserContext({ proxyServer })` + `page.authenticate()` +- [cdpUseScript.py](./cdpUseScript.py) — `main_with_proxy()` uses `Target.createBrowserContext({ proxyServer })` + manual `Fetch.continueWithAuth` + +Playwright and Puppeteer answer the proxy 407 challenge for you; with +`cdp-use` the function shows the full dance. + ## 📚 Resources - [Microsoft Playwright Service](https://learn.microsoft.com/azure/playwright-testing/) diff --git a/samples/cdp-tests/cdpUseScript.py b/samples/cdp-tests/cdpUseScript.py new file mode 100644 index 0000000..5d91486 --- /dev/null +++ b/samples/cdp-tests/cdpUseScript.py @@ -0,0 +1,149 @@ +""" +cdp-use over PWW + +Drives a remote Chromium on Microsoft Playwright Service using the low-level +`cdp-use` Python CDP client. + +---------------------------------------- +Install +---------------------------------------- +pip install cdp-use python-dotenv aiohttp + +---------------------------------------- +Required env vars +---------------------------------------- +PLAYWRIGHT_SERVICE_URL +PLAYWRIGHT_SERVICE_ACCESS_TOKEN + +---------------------------------------- +Run +---------------------------------------- +python cdpUseScript.py +""" + +import asyncio +import os +from typing import Optional + +from cdp_use.client import CDPClient +from dotenv import load_dotenv + +from playwright_service_client import get_cdp_endpoint + +load_dotenv() + + +async def main(): + cdp_url = await get_cdp_endpoint() + + async with CDPClient(cdp_url) as client: + ctx = await client.send.Target.createBrowserContext() + target = await client.send.Target.createTarget( + params={"url": "about:blank", "browserContextId": ctx["browserContextId"]} + ) + session = await client.send.Target.attachToTarget( + params={"targetId": target["targetId"], "flatten": True} + ) + session_id = session["sessionId"] + + load_event = asyncio.Event() + + def on_load(event, sid: Optional[str]) -> None: + load_event.set() + + client.register.Page.loadEventFired(on_load) + + await client.send.Page.enable(session_id=session_id) + await client.send.Runtime.enable(session_id=session_id) + + await client.send.Page.navigate( + params={"url": "https://example.com"}, session_id=session_id + ) + await load_event.wait() + + result = await client.send.Runtime.evaluate( + params={"expression": "document.title"}, session_id=session_id + ) + print("Page title:", result["result"]["value"]) + + +# Opt-in proxy variant. Not invoked by default — change the entry point at +# the bottom of this file to `main_with_proxy()` to use it. Requires +# PROXY_SERVER / PROXY_USERNAME / PROXY_PASSWORD in your env. +# +# cdp-use does not abstract proxy auth, so we enable Fetch interception +# and answer Fetch.authRequired ourselves with Fetch.continueWithAuth. +async def main_with_proxy(): + cdp_url = await get_cdp_endpoint() + + async with CDPClient(cdp_url) as client: + ctx = await client.send.Target.createBrowserContext( + params={"proxyServer": os.environ["PROXY_SERVER"]} + ) + target = await client.send.Target.createTarget( + params={"url": "about:blank", "browserContextId": ctx["browserContextId"]} + ) + session = await client.send.Target.attachToTarget( + params={"targetId": target["targetId"], "flatten": True} + ) + session_id = session["sessionId"] + + load_event = asyncio.Event() + + async def on_auth(event, sid: Optional[str]) -> None: + if event["authChallenge"]["source"] == "Proxy": + await client.send.Fetch.continueWithAuth( + params={ + "requestId": event["requestId"], + "authChallengeResponse": { + "response": "ProvideCredentials", + "username": os.environ["PROXY_USERNAME"], + "password": os.environ["PROXY_PASSWORD"], + }, + }, + session_id=sid, + ) + else: # never leak proxy creds to origin servers + await client.send.Fetch.continueWithAuth( + params={ + "requestId": event["requestId"], + "authChallengeResponse": {"response": "CancelAuth"}, + }, + session_id=sid, + ) + + async def on_paused(event, sid: Optional[str]) -> None: + await client.send.Fetch.continueRequest( + params={"requestId": event["requestId"]}, session_id=sid + ) + + def on_load(event, sid: Optional[str]) -> None: + load_event.set() + + client.register.Fetch.authRequired(on_auth) + client.register.Fetch.requestPaused(on_paused) + client.register.Page.loadEventFired(on_load) + + await client.send.Page.enable(session_id=session_id) + await client.send.Runtime.enable(session_id=session_id) + await client.send.Fetch.enable( + params={ + "handleAuthRequests": True, + "patterns": [{"urlPattern": "*"}], + }, + session_id=session_id, + ) + + await client.send.Page.navigate( + params={"url": "https://example.com"}, session_id=session_id + ) + await load_event.wait() + + result = await client.send.Runtime.evaluate( + params={"expression": "document.title"}, session_id=session_id + ) + print("Page title (via proxy):", result["result"]["value"]) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/samples/cdp-tests/connectOverCDPScript.js b/samples/cdp-tests/connectOverCDPScript.js index 7e7fbf7..1c3a6eb 100644 --- a/samples/cdp-tests/connectOverCDPScript.js +++ b/samples/cdp-tests/connectOverCDPScript.js @@ -64,6 +64,33 @@ async function main() { console.log('✅ Done!'); } +// Opt-in proxy variant. Not invoked by default — change the entry point at +// the bottom of this file to `mainWithProxy()` to use it. Requires +// PROXY_SERVER / PROXY_USERNAME / PROXY_PASSWORD in your env. Playwright +// answers the 407 challenge for you. +async function mainWithProxy() { + const cdpUrl = await getCdpEndpoint(); + const browser = await chromium.connectOverCDP( + cdpUrl, + { headers: { 'User-Agent': 'Chrome-DevTools-Protocol/1.3' } } + ); + + const context = await browser.newContext({ + proxy: { + server: process.env.PROXY_SERVER, + username: process.env.PROXY_USERNAME, + password: process.env.PROXY_PASSWORD, + }, + }); + const page = await context.newPage(); + + await page.goto('https://example.com'); + console.log(`📌 Page title (via proxy): ${await page.title()}`); + + await context.close(); + await browser.close(); +} + main().catch(error => { console.error('❌ Error:', error.message); process.exit(1); diff --git a/samples/cdp-tests/puppeteerScript.js b/samples/cdp-tests/puppeteerScript.js new file mode 100644 index 0000000..b605de3 --- /dev/null +++ b/samples/cdp-tests/puppeteerScript.js @@ -0,0 +1,68 @@ +/** + * Puppeteer over CDP - Microsoft Playwright Service + * + * Connects puppeteer-core to a remote Chromium on PWW over CDP. + * + * Install: + * npm install puppeteer-core + * + * Environment Variables: + * PLAYWRIGHT_SERVICE_URL=wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers + * PLAYWRIGHT_SERVICE_ACCESS_TOKEN=your_access_token + * + * Usage: + * node puppeteerScript.js + */ + +import puppeteer from 'puppeteer-core'; +import { getCdpEndpoint } from './playwrightServiceClient.js'; + +async function main() { + const cdpUrl = await getCdpEndpoint(); + + const browser = await puppeteer.connect({ + browserWSEndpoint: cdpUrl, + defaultViewport: null, + }); + + const context = await browser.createBrowserContext(); + const page = await context.newPage(); + + await page.goto('https://example.com', { waitUntil: 'domcontentloaded' }); + console.log('Page title:', await page.title()); + + await context.close(); + await browser.disconnect(); +} + +// Opt-in proxy variant. Not invoked by default — change the entry point at +// the bottom of this file to `mainWithProxy()` to use it. Requires +// PROXY_SERVER / PROXY_USERNAME / PROXY_PASSWORD in your env. +async function mainWithProxy() { + const cdpUrl = await getCdpEndpoint(); + + const browser = await puppeteer.connect({ + browserWSEndpoint: cdpUrl, + defaultViewport: null, + }); + + const context = await browser.createBrowserContext({ + proxyServer: process.env.PROXY_SERVER, + }); + const page = await context.newPage(); + await page.authenticate({ + username: process.env.PROXY_USERNAME, + password: process.env.PROXY_PASSWORD, + }); + + await page.goto('https://example.com', { waitUntil: 'domcontentloaded' }); + console.log('Page title (via proxy):', await page.title()); + + await context.close(); + await browser.disconnect(); +} + +main().catch((err) => { + console.error('Error:', err); + process.exit(1); +}); diff --git a/samples/cdp-tests/requirements.txt b/samples/cdp-tests/requirements.txt index cad986f..41c54a2 100644 --- a/samples/cdp-tests/requirements.txt +++ b/samples/cdp-tests/requirements.txt @@ -12,3 +12,6 @@ pytest-asyncio>=0.21.0 # For browser_use_remote.py (AI agent scenario) pydantic>=2.0.0 browser-use>=0.1.0 + +# For cdpUseProxyScript.py (opt-in proxy sample) +cdp-use>=0.3.0 diff --git a/samples/playwright-lib/ReadMe.md b/samples/playwright-lib/ReadMe.md index c0931ef..1ddb84d 100644 --- a/samples/playwright-lib/ReadMe.md +++ b/samples/playwright-lib/ReadMe.md @@ -22,4 +22,21 @@ $env:PLAYWRIGHT_RUN_ID="your_guid" npx ts-node src/example.ts ``` -- Test Runs get updated at 5 min interval, so check current test run details after 5 min of running script. \ No newline at end of file +- Test Runs get updated at 5 min interval, so check current test run details after 5 min of running script. + +## Optional: route the run through an authenticated HTTP proxy + +The default [`src/example.ts`](./src/example.ts) talks to PWW directly. If you +need every BrowserContext to go through an authenticated forward proxy, use +the opt-in [`src/example-proxy.ts`](./src/example-proxy.ts) instead. It adds a +`proxy` option to `browser.newContext()`; Playwright handles the 407 +challenge for you. + +```powershell +$env:PROXY_SERVER = "http://:8080" +$env:PROXY_USERNAME = "" +$env:PROXY_PASSWORD = "" +$env:PROXY_ONLY_URL = "http://intranet.example/healthcheck" + +npx ts-node src/example-proxy.ts +``` \ No newline at end of file diff --git a/samples/playwright-lib/src/example-proxy.ts b/samples/playwright-lib/src/example-proxy.ts new file mode 100644 index 0000000..cbf9b08 --- /dev/null +++ b/samples/playwright-lib/src/example-proxy.ts @@ -0,0 +1,54 @@ +import { chromium, devices } from 'playwright'; +import { randomUUID } from 'crypto'; + +/** + * Opt-in proxy variant of example.ts. + * + * Same `chromium.connect()` flow against PWW, but every BrowserContext is + * created with a `proxy:` option so all traffic from this run is routed + * through your authenticated HTTP forward proxy. Playwright transparently + * answers the 407 challenge with the supplied credentials. + * + * Required env vars (in addition to PLAYWRIGHT_SERVICE_URL + + * PLAYWRIGHT_SERVICE_ACCESS_TOKEN): + * PROXY_SERVER e.g. http://:8080 + * PROXY_USERNAME + * PROXY_PASSWORD + * PROXY_ONLY_URL the URL to fetch through the proxy + * + * Run: + * npx ts-node src/example-proxy.ts + */ +const runId = process.env['PLAYWRIGHT_RUN_ID'] || randomUUID(); +const os = 'linux'; +const apiVersion = '2025-09-01'; + +const wsEndpoint = + `${process.env['PLAYWRIGHT_SERVICE_URL']}` + + `?runId=${encodeURIComponent(runId)}&os=${os}&api-version=${apiVersion}`; + +const connectOptions = { + headers: { Authorization: `Bearer ${process.env['PLAYWRIGHT_SERVICE_ACCESS_TOKEN'] || ''}` }, + timeout: 3 * 60 * 1000, + exposeNetwork: '', +}; + +const proxy = { + server: process.env['PROXY_SERVER']!, + username: process.env['PROXY_USERNAME'], + password: process.env['PROXY_PASSWORD'], +}; + +(async () => { + const browser = await chromium.connect(wsEndpoint, connectOptions); + const context = await browser.newContext({ ...devices['Desktop Chrome'], proxy }); + const page = await context.newPage(); + + const target = process.env['PROXY_ONLY_URL']!; + const response = await page.goto(target); + console.log(`status: ${response?.status()}`); + console.log('title :', await page.title()); + + await context.close(); + await browser.close(); +})(); diff --git a/samples/playwright-proxy-tests/.env.example b/samples/playwright-proxy-tests/.env.example deleted file mode 100644 index 261396c..0000000 --- a/samples/playwright-proxy-tests/.env.example +++ /dev/null @@ -1,15 +0,0 @@ -# Microsoft Playwright Workspaces (PWW) — region + workspace ID -# Get these from the Azure portal under your Playwright Workspaces resource. -PLAYWRIGHT_SERVICE_URL="wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers" -PLAYWRIGHT_SERVICE_ACCESS_TOKEN="" - -# Authenticated forward proxy (point at your own). -PROXY_SERVER="http://:8080" -PROXY_USERNAME="" -PROXY_PASSWORD="" - -# A URL reachable through your proxy (e.g. a private intranet origin, a -# service whitelisted by your proxy's egress IP, etc.). Each sample's final -# step fetches it through the proxied context to prove the proxy is tunnelling -# traffic end to end. Replace with one of your own. -PROXY_ONLY_URL="http://intranet.example/healthcheck" diff --git a/samples/playwright-proxy-tests/.gitignore b/samples/playwright-proxy-tests/.gitignore deleted file mode 100644 index 2e8157a..0000000 --- a/samples/playwright-proxy-tests/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -node_modules/ -.env -*.log diff --git a/samples/playwright-proxy-tests/README.md b/samples/playwright-proxy-tests/README.md deleted file mode 100644 index 1937136..0000000 --- a/samples/playwright-proxy-tests/README.md +++ /dev/null @@ -1,279 +0,0 @@ -# Playwright Workspaces + Authenticated HTTP Proxy — Samples - -Three runnable Node.js samples showing how to route a remote Chromium on -**Microsoft Playwright Workspaces (PWW)** through an **authenticated outbound -HTTP proxy** that you provide. - -The samples are deliberately small (one file each, no test framework, no -abstraction layer) so you can read the entire request flow end to end and -copy the parts you need into your own code. - -> You bring your own authenticated forward proxy. These samples only show -> how to drive PWW through one — they do not deploy a proxy for you. - ---- - -## Folder layout - -``` -playwright-proxy-tests/ -├── README.md ← you are here -├── .env.example ← copy to .env and fill in -├── package.json ← installs playwright + dotenv -├── connectOverCdp.mjs ← Sample 1: Playwright over CDP (recommended) -├── rawCdp.mjs ← Sample 2: hand-rolled CDP JSON-RPC -├── playwrightConnect.mjs ← Sample 3: Playwright native wire protocol -└── pwwSessionClient.mjs ← helper: gets a CDP wss:// URL from PWW -``` - ---- - -## The three samples at a glance - -The three samples are **not** identical demos. They differ in (a) what wire -protocol talks to the remote browser and (b) how much of the proxy auth dance -*you* have to write. They also differ in which steps they run: - -| File | Wire protocol to PWW | Proxy auth handled by | Direct step? | Proxied step? | Proxy-only URL step? | -| --- | --- | --- | :---: | :---: | :---: | -| [connectOverCdp.mjs](connectOverCdp.mjs) | CDP (`chromium.connectOverCDP`) | Playwright (internal `Fetch.*`) | yes | yes | yes | -| [rawCdp.mjs](rawCdp.mjs) | CDP (raw WebSocket JSON-RPC) | **You — `Fetch.enable` + `Fetch.continueWithAuth`** | no | yes | yes | -| [playwrightConnect.mjs](playwrightConnect.mjs) | Playwright native wire protocol (`chromium.connect`) | PWW server-side (you never see the 407) | yes | yes | yes | - -The "proxy-only URL" step navigates to `PROXY_ONLY_URL` from `.env` (see -[Setup](#setup-one-time) below) — plug in any URL you want fetched through -the proxy. - ---- - -## What each sample actually demonstrates - -### 1. `connectOverCdp.mjs` — recommended - -Connects with `chromium.connectOverCDP()` against a one-shot PWW CDP endpoint, -then runs: - -| Step | Context | URL | Expected output | -| ---: | --- | --- | --- | -| 1 | `browser.newContext()` (no proxy) | `https://api.ipify.org` | the PWW container's egress IP | -| 2 | `browser.newContext({ proxy })` | `https://api.ipify.org` | the **proxy's** egress IP | -| 3 | same proxied context | `$PROXY_ONLY_URL` | whatever your URL returns | - -The proxy `407` is handled inside Playwright — your code is just -`newContext({ proxy: { server, username, password } })`. - -### 2. `rawCdp.mjs` — see the protocol explicitly - -No Playwright. Opens a raw WebSocket to the PWW CDP endpoint and writes every -JSON-RPC frame by hand. This is the path to use if you need to **drive PWW -from a non-Node client** (any language with a WebSocket library) or if you're -debugging exactly what Playwright is sending. - -Setup frames (in order): - -``` -Target.createBrowserContext { proxyServer } → browserContextId -Target.createTarget { browserContextId, url } → targetId -Target.attachToTarget { targetId, flatten:true } → sessionId (all subsequent frames carry this) -Page.enable (so we can await Page.loadEventFired) -Runtime.enable (so we can Runtime.evaluate) -Fetch.enable { handleAuthRequests:true, patterns:[*] } (you now own the auth) -``` - -Then it runs through that proxied session: - -| Step | URL | Expected output | -| ---: | --- | --- | -| 1 | `https://api.ipify.org` | the proxy's egress IP | -| 2 | `$PROXY_ONLY_URL` | whatever your URL returns | - -Run with `$env:CDP_DEBUG=1` to print every frame the script sends (`>>`) and -receives (`<<`). - -### 3. `playwrightConnect.mjs` — PWW does it all - -Connects with `chromium.connect()` against the **PWW service URL** (not a CDP -URL). The connection uses Playwright's native wire protocol over WebSocket, -authenticated with `Authorization: Bearer `. No CDP frames cross your -laptop's network — PWW relays everything on the server side. - -Runs the same steps as Sample 1, including the `$PROXY_ONLY_URL` step. The -observable behaviour is identical; the difference is purely the on-the-wire -protocol and where the auth dance happens (PWW relays it for you). - ---- - -## You have to authenticate twice - -Every sample performs **two** independent authentications. They are unrelated -and easy to confuse: - -1. **To the remote browser host (PWW).** A Bearer access token in the - `Authorization` header on the initial WebSocket upgrade. - - Sample 1 (`connectOverCdp.mjs`) — the token is in the wss URL Playwright - gets from the PWW REST API via [`pwwSessionClient.mjs`](pwwSessionClient.mjs). - - Sample 2 (`rawCdp.mjs`) — same wss URL, opened directly with `new WebSocket(...)`. - - Sample 3 (`playwrightConnect.mjs`) — passed via - `chromium.connect(url, { headers: { Authorization: 'Bearer ...' } })`. - - Lives in `PLAYWRIGHT_SERVICE_ACCESS_TOKEN` in `.env`. If this fails, - you get `401 Authentication failed`. - -2. **To the outbound HTTP proxy.** Standard HTTP Basic auth via the - `Proxy-Authorization: Basic ` header on every CONNECT / - request that traverses the proxy. If it fails, the proxy returns - `407 Proxy Authentication Required`. - - Lives in `PROXY_USERNAME` / `PROXY_PASSWORD` in `.env`. - - How this gets onto the wire is what differs across the three samples - (next section). - ---- - -## How the `407` is answered - -When a proxied request hits the proxy for the first time it gets -`407 Proxy Authentication Required` with `Proxy-Authenticate: Basic`. Someone -has to retry it with `Proxy-Authorization: Basic `. Each -sample arranges that differently. - -### Sample 1 & 3: Playwright handles it - -```js -const ctx = await browser.newContext({ - proxy: { server, username, password }, -}); -``` - -Playwright registers a `Fetch.enable { handleAuthRequests: true }` handler -internally and replies to every `Fetch.authRequired` event with your -credentials when the challenge is from a proxy, or cancels otherwise. Your -code has no callbacks, no event listeners, no protocol concerns. - -### Sample 2: you handle it (this is what makes raw CDP "raw") - -In raw CDP there is no abstraction — `Target.createBrowserContext { proxyServer }` -**only configures which proxy to talk to**, it does NOT configure credentials. -On the very first request, Chromium gets a 407 and stops. To get past it you -have to: - -1. Subscribe to the proxied session's `Fetch.*` events. -2. Enable interception with - `Fetch.enable { handleAuthRequests: true, patterns:[{urlPattern:'*'}] }`. -3. On each `Fetch.authRequired` event, decide based on `authChallenge.source`: - - `'Proxy'` → reply - `Fetch.continueWithAuth { response:'ProvideCredentials', username, password }`. - - `'Server'` → reply `Fetch.continueWithAuth { response:'CancelAuth' }` — - **do not send proxy creds to origin sites**; that would leak them to - any 401 site you visit. -4. On every non-auth `Fetch.requestPaused` event, reply - `Fetch.continueRequest { requestId }` so the request actually goes out. - -If you forget step 4, every request hangs because `Fetch.enable` pauses *all* -requests, not just auth-challenged ones. If you forget the `'Server'` branch -in step 3, you ship a credential-leakage bug. The full handler is in -[rawCdp.mjs](rawCdp.mjs). - ---- - -## Setup (one time) - -You need: - -- **Node.js 18+** (for built-in `WebSocket` and `fetch`). -- **A PWW workspace** + an access token (Azure portal → your Playwright Workspaces resource). -- **An authenticated HTTP proxy you control.** Point the samples at it via - `PROXY_SERVER` / `PROXY_USERNAME` / `PROXY_PASSWORD`. To prove the proxy is - actually in the request path, the samples compare the egress IP reported by - `api.ipify.org` with and without the proxy — they should differ. -- **A URL to fetch through the proxy.** Set `PROXY_ONLY_URL` in `.env` to - any hostname your proxy can reach — a private intranet origin, an - IP-allowlisted service, or even a public URL. Each sample's final step - navigates to it through the proxied context. - -Then: - -```powershell -# 1) Install dependencies -cd playwright-proxy-tests -npm install - -# 2) Configure credentials -Copy-Item .env.example .env -# Edit .env and fill in: -# PLAYWRIGHT_SERVICE_URL, PLAYWRIGHT_SERVICE_ACCESS_TOKEN, -# PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD, PROXY_ONLY_URL -``` - -> `.env` is gitignored. Never commit real credentials. - -`.env` is shared by all three samples — each `.mjs` loads -`new URL('./.env', import.meta.url)` so it works from any cwd. - ---- - -## Running the samples - -```powershell -# Sample 1 — recommended high-level path -npm run sample:connect-over-cdp - -# Sample 2 — raw CDP. Add CDP_DEBUG=1 to print every frame: -npm run sample:raw-cdp -$env:CDP_DEBUG=1; node rawCdp.mjs; Remove-Item env:CDP_DEBUG - -# Sample 3 — Playwright native wire protocol. -# Add DEBUG=pw:* to see Playwright's outbound protocol frames: -npm run sample:playwright-connect -$env:DEBUG="pw:*"; node playwrightConnect.mjs 2>pw.log; Remove-Item env:DEBUG -``` - -Expected: in Samples 1 and 3, the direct step prints one IP and the proxied -step prints a different IP (the proxy's egress IP). In Sample 2, the single -proxied IP step prints the same IP as Samples 1 and 3's proxied step. The -final step in each sample prints the body of `PROXY_ONLY_URL` fetched -through the proxied context. - ---- - -## Key facts worth knowing - -- **PWW exposes one CDP WebSocket per remote browser.** Multi-session work - (`Target.attachToTarget`) requires `flatten: true` so messages multiplex via - `sessionId` on that single socket. See - [crbug/40639208](https://issues.chromium.org/issues/40639208). -- **Per-context proxy is supported.** A Chromium context is bound to its - `proxyServer` at create time. This is why all three samples can mix a - direct context and a proxied context in the same browser (and Samples 1/3 - do exactly that). -- **Auth source matters.** On `Fetch.authRequired`, always check - `authChallenge.source`. Provide credentials only for `'Proxy'`; cancel - otherwise. Sending proxy credentials to an origin server is a leak. -- **`chromium.connect` vs `chromium.connectOverCDP`.** `connect` uses - Playwright's native protocol — PWW relays everything, your laptop never - speaks CDP. `connectOverCDP` opens a real CDP socket to the remote browser - — your machine speaks CDP directly. Either way Chromium-level features - (per-context proxy, etc.) behave the same. -- **Egress IPs.** - - PWW direct → an IP from the Microsoft-owned PWW egress range (varies by region). - - Via the proxy → the egress (SNAT) IP of your proxy. - ---- - -## Troubleshooting - -| Symptom | Likely cause | -| --- | --- | -| `Authentication failed. Check your access token.` | Expired or wrong `PLAYWRIGHT_SERVICE_ACCESS_TOKEN`. Regenerate in the portal. | -| `Invalid PLAYWRIGHT_SERVICE_URL format` | Must be `wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers`. | -| Script hangs on the first proxied navigation. | Most common: `PROXY_USERNAME` / `PROXY_PASSWORD` don't match what the proxy was deployed with — the proxy keeps returning 407. | -| `rawCdp.mjs` hangs even though direct CDP works. | You probably forgot to forward non-auth `Fetch.requestPaused` events with `Fetch.continueRequest`. `Fetch.enable` pauses **every** request. | -| Proxy creds appear on an origin site. | You're answering `Fetch.authRequired` with `ProvideCredentials` regardless of `authChallenge.source`. Gate on `=== 'Proxy'`. | -| `PROXY_ONLY_URL` step hangs or 502s. | The URL isn't reachable through your proxy (DNS, ACL, or proxy isn't tunnelling CONNECT for that host). Try the URL from a client behind the proxy first. | -| Sample 1/3 hangs on `newContext`. | Network can't reach PWW. Check corporate firewall lets `*.api.playwright.microsoft.com:443` through. | -| `connectOverCdp.mjs` works, `rawCdp.mjs` doesn't connect at all. | You called `Target.attachToTarget` without `flatten: true`. PWW's single-socket model requires flattened sessions. | - ---- - -## What to read next - -- The top-of-file docstring in each `.mjs` — recaps the demo steps and gives - copy-pasteable debug commands specific to that sample. diff --git a/samples/playwright-proxy-tests/connectOverCdp.mjs b/samples/playwright-proxy-tests/connectOverCdp.mjs deleted file mode 100644 index 78241b0..0000000 --- a/samples/playwright-proxy-tests/connectOverCdp.mjs +++ /dev/null @@ -1,107 +0,0 @@ -/** - * connectOverCdp.mjs - * ============================================================================ - * Sample 1 of 3 — Playwright over CDP (recommended high-level path) - * ============================================================================ - * Drives a remote Chromium on Microsoft Playwright Workspaces (PWW) using - * Playwright's high-level `chromium.connectOverCDP()`. Playwright internally - * emits the same `Target.*` / `Fetch.*` CDP frames you can see in - * `rawCdp.mjs`, but you write a single `browser.newContext({ proxy })` call - * instead of handcrafting the protocol. - * - * This is the path most customers should use. - * - * Demo runs three steps: - * 1. DIRECT context -> https://api.ipify.org?format=json - * (shows the PWW container's public egress IP) - * 2. PROXIED context -> https://api.ipify.org?format=json - * (shows the PROXY's egress IP — request went through the proxy) - * 3. SAME proxied ctx -> $PROXY_ONLY_URL - * (fetches a URL of your choice through the proxy — e.g. a private - * intranet origin only reachable via your proxy) - * - * Run: - * node connectOverCdp.mjs - * - * See the CDP frames Playwright is sending under the hood: - * $env:DEBUG = "pw:protocol"; node connectOverCdp.mjs *> cdp.log; Remove-Item env:DEBUG - * Select-String cdp.log -Pattern "createBrowserContext|Fetch.authRequired|continueWithAuth" -Context 0,1 - */ - -import { config } from 'dotenv'; -config({ path: new URL('./.env', import.meta.url) }); - -import { chromium } from 'playwright'; -import { getCdpEndpoint } from './pwwSessionClient.mjs'; - -// ─────────────────────────────────────────────────────────────────────────── // -// Config // -// ─────────────────────────────────────────────────────────────────────────── // - -const { PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD, PROXY_ONLY_URL } = process.env; - -const IPIFY_URL = 'https://api.ipify.org?format=json'; - -const PROXY = { - server: PROXY_SERVER, // e.g. http://:8080 - username: PROXY_USERNAME, - password: PROXY_PASSWORD, -}; - -// ─────────────────────────────────────────────────────────────────────────── // -// Connect to PWW over CDP // -// getCdpEndpoint() asks the PWW REST API for a one-shot wss:// URL. // -// ─────────────────────────────────────────────────────────────────────────── // - -const cdpEndpoint = await getCdpEndpoint(); -const browser = await chromium.connectOverCDP(cdpEndpoint); - -// ═════════════════════════════════════════════════════════════════════════ // -// STEP 1 — DIRECT context → ipify // -// No `proxy` option => Chromium reaches the internet directly from the // -// PWW container. ipify returns the container's public egress IP. // -// ═════════════════════════════════════════════════════════════════════════ // - -const directContext = await browser.newContext(); -const directPage = await directContext.newPage(); - -await directPage.goto(IPIFY_URL); -const directBody = await directPage.locator('body').innerText(); - -console.log('--- 1) DIRECT -> ipify ---'); -console.log(directBody); - -await directContext.close(); - -// ═════════════════════════════════════════════════════════════════════════ // -// STEP 2 — PROXIED context → ipify // -// Passing `proxy` to newContext makes Chromium route every request from // -// this context through the authenticated HTTP proxy. ipify now reports // -// the PROXY's egress IP, not the container's. // -// ═════════════════════════════════════════════════════════════════════════ // - -const proxiedContext = await browser.newContext({ proxy: PROXY }); -const proxiedPage = await proxiedContext.newPage(); - -await proxiedPage.goto(IPIFY_URL); -const proxiedBody = await proxiedPage.locator('body').innerText(); - -console.log('--- 2) PROXIED -> ipify ---'); -console.log(proxiedBody); - -// ═════════════════════════════════════════════════════════════════════════ // -// STEP 3 — SAME proxied context → a URL of your choice // -// Customer-supplied via PROXY_ONLY_URL in .env. Use any hostname that is // -// reachable through your proxy (private intranet, IP-allowlisted service, // -// etc.). // -// ═════════════════════════════════════════════════════════════════════════ // - -const proxyOnlyPage = await proxiedContext.newPage(); -await proxyOnlyPage.goto(PROXY_ONLY_URL); -const proxyOnlyBody = await proxyOnlyPage.locator('body').innerText(); - -console.log(`--- 3) PROXIED -> ${PROXY_ONLY_URL} ---`); -console.log(proxyOnlyBody); - -await proxiedContext.close(); -await browser.close(); diff --git a/samples/playwright-proxy-tests/package.json b/samples/playwright-proxy-tests/package.json deleted file mode 100644 index b663ffc..0000000 --- a/samples/playwright-proxy-tests/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "name": "pww-proxy-samples", - "version": "1.0.0", - "type": "module", - "private": true, - "description": "Three working samples for using an authenticated HTTP proxy with Microsoft Playwright Workspaces remote Chromium.", - "scripts": { - "sample:connect-over-cdp": "node connectOverCdp.mjs", - "sample:raw-cdp": "node rawCdp.mjs", - "sample:playwright-connect": "node playwrightConnect.mjs" - }, - "dependencies": { - "dotenv": "^16.4.5", - "playwright": "^1.48.0" - } -} diff --git a/samples/playwright-proxy-tests/playwrightConnect.mjs b/samples/playwright-proxy-tests/playwrightConnect.mjs deleted file mode 100644 index bb9bc3f..0000000 --- a/samples/playwright-proxy-tests/playwrightConnect.mjs +++ /dev/null @@ -1,123 +0,0 @@ -/** - * playwrightConnect.mjs - * ============================================================================ - * Sample 3 of 3 — Playwright's native wire protocol (no CDP on the wire) - * ============================================================================ - * Drives a remote Chromium on Microsoft Playwright Workspaces (PWW) via - * Playwright's NATIVE wire protocol (not CDP). `chromium.connect()` is - * pointed at the PWW service URL with a Bearer-token Authorization header. - * PWW relays `browser.newContext({ proxy })` to the remote Chromium and the - * proxy-auth handshake (Fetch.* frames) is performed entirely server-side. - * From this laptop we never see a CDP frame. - * - * Demo runs three steps: - * 1. DIRECT context -> https://api.ipify.org?format=json - * (shows the PWW container's public egress IP) - * 2. PROXIED context -> https://api.ipify.org?format=json - * (shows the PROXY's egress IP — request went through the proxy) - * 3. SAME proxied ctx -> $PROXY_ONLY_URL - * (fetches a URL of your choice through the proxy — e.g. a private - * intranet origin only reachable via your proxy) - * - * Run: - * node playwrightConnect.mjs - * - * See the Playwright wire-protocol frames being sent: - * $env:DEBUG="pw:*"; node playwrightConnect.mjs 2>pw.log; Remove-Item env:DEBUG - * Select-String pw.log -Pattern "newContext|setNetworkProxy|proxyServer" -Context 0,1 - */ - -import { config } from 'dotenv'; -config({ path: new URL('./.env', import.meta.url) }); - -import { randomUUID } from 'node:crypto'; -import { chromium } from 'playwright'; - -// ─────────────────────────────────────────────────────────────────────────── // -// Config // -// ─────────────────────────────────────────────────────────────────────────── // - -const { - PLAYWRIGHT_SERVICE_URL, // wss://.api.playwright.microsoft.com/... - PLAYWRIGHT_SERVICE_ACCESS_TOKEN, // PAT issued from the PWW portal - PROXY_SERVER, - PROXY_USERNAME, - PROXY_PASSWORD, - PROXY_ONLY_URL, -} = process.env; - -const IPIFY_URL = 'https://api.ipify.org?format=json'; - -const PROXY = { - server: PROXY_SERVER, - username: PROXY_USERNAME, - password: PROXY_PASSWORD, -}; - -// PWW wire-protocol query string. `runId` is a per-session UUID, `os` picks -// the remote container image, `api-version` pins the contract. -const API_VERSION = '2025-09-01'; -const OS_NAME = 'linux'; -const wsEndpoint = - `${PLAYWRIGHT_SERVICE_URL}` + - `?runId=${encodeURIComponent(randomUUID())}` + - `&os=${OS_NAME}` + - `&api-version=${API_VERSION}`; - -// ─────────────────────────────────────────────────────────────────────────── // -// Connect to PWW over the Playwright wire protocol // -// Auth is a Bearer token in the Authorization header — no CDP socket open. // -// ─────────────────────────────────────────────────────────────────────────── // - -const browser = await chromium.connect(wsEndpoint, { - headers: { Authorization: `Bearer ${PLAYWRIGHT_SERVICE_ACCESS_TOKEN}` }, - timeout: 3 * 60 * 1000, -}); - -// ═════════════════════════════════════════════════════════════════════════ // -// STEP 1 — DIRECT context → ipify // -// No `proxy` option => Chromium reaches the internet directly from the // -// PWW container. ipify returns the container's public egress IP. // -// ═════════════════════════════════════════════════════════════════════════ // - -const directContext = await browser.newContext(); -const directPage = await directContext.newPage(); - -await directPage.goto(IPIFY_URL); -const directBody = await directPage.locator('body').innerText(); - -console.log('--- 1) DIRECT -> ipify ---'); -console.log(directBody); - -await directContext.close(); - -// ═════════════════════════════════════════════════════════════════════════ // -// STEP 2 — PROXIED context → ipify // -// PWW relays the `proxy` setting to the remote browser; proxy 407 auth is // -// handled server-side. ipify now reports the PROXY's egress IP. // -// ═════════════════════════════════════════════════════════════════════════ // - -const proxiedContext = await browser.newContext({ proxy: PROXY }); -const proxiedPage = await proxiedContext.newPage(); - -await proxiedPage.goto(IPIFY_URL); -const proxiedBody = await proxiedPage.locator('body').innerText(); - -console.log('--- 2) PROXIED -> ipify ---'); -console.log(proxiedBody); - -// ═════════════════════════════════════════════════════════════════════════ // -// STEP 3 — SAME proxied context → a URL of your choice // -// Customer-supplied via PROXY_ONLY_URL in .env. Use any hostname that is // -// reachable through your proxy. // -// ═════════════════════════════════════════════════════════════════════════ // - -const proxyOnlyPage = await proxiedContext.newPage(); -await proxyOnlyPage.goto(PROXY_ONLY_URL); -const proxyOnlyBody = await proxyOnlyPage.locator('body').innerText(); - -console.log(`--- 3) PROXIED -> ${PROXY_ONLY_URL} ---`); -console.log(proxyOnlyBody); - -await proxiedContext.close(); -await browser.close(); diff --git a/samples/playwright-proxy-tests/pwwSessionClient.mjs b/samples/playwright-proxy-tests/pwwSessionClient.mjs deleted file mode 100644 index 1e64c1a..0000000 --- a/samples/playwright-proxy-tests/pwwSessionClient.mjs +++ /dev/null @@ -1,73 +0,0 @@ -/** - * pwwSessionClient.mjs - * - * Tiny helper: asks Microsoft Playwright Workspaces (PWW) for a one-shot - * wss:// CDP endpoint, given PLAYWRIGHT_SERVICE_URL + PLAYWRIGHT_SERVICE_ACCESS_TOKEN. - * - * Imported by `rawCdp.mjs` and `connectOverCdp.mjs`. Not meant to be run directly. - * - * Usage: - * import { getCdpEndpoint } from './pwwSessionClient.mjs'; - * const cdpUrl = await getCdpEndpoint(); - * const browser = await chromium.connectOverCDP(cdpUrl); - */ - -export class PlaywrightServiceError extends Error { - constructor(message) { - super(message); - this.name = 'PlaywrightServiceError'; - } -} - -function parseServiceUrl(url) { - const match = url.match(/wss:\/\/(\w+)\.api\.playwright\.microsoft\.com\/playwrightworkspaces\/([^/]+)\/browsers/); - if (!match) { - throw new PlaywrightServiceError( - `Invalid PLAYWRIGHT_SERVICE_URL format: ${url}\n` + - 'Expected: wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers' - ); - } - return { region: match[1], workspaceId: match[2] }; -} - -export async function getCdpEndpoint(serviceUrl = null, accessToken = null, osName = 'Linux') { - serviceUrl = serviceUrl || process.env.PLAYWRIGHT_SERVICE_URL; - accessToken = accessToken || process.env.PLAYWRIGHT_SERVICE_ACCESS_TOKEN; - - if (!serviceUrl) { - throw new PlaywrightServiceError( - 'PLAYWRIGHT_SERVICE_URL environment variable is not set.\n' + - 'Expected: wss://.api.playwright.microsoft.com/playwrightworkspaces//browsers' - ); - } - if (!accessToken) { - throw new PlaywrightServiceError('PLAYWRIGHT_SERVICE_ACCESS_TOKEN environment variable is not set.'); - } - - const { region, workspaceId } = parseServiceUrl(serviceUrl); - - const apiUrl = `https://${region}.api.playwright.microsoft.com/playwrightworkspaces/${workspaceId}/browsers?os=${osName}&browser=chromium&playwrightVersion=cdp&shouldRedirect=false`; - - const headers = { - 'Authorization': `Bearer ${accessToken}`, - 'Accept': 'application/json', - }; - - const response = await fetch(apiUrl, { headers }); - - if (response.status === 401) { - throw new PlaywrightServiceError('Authentication failed. Check your access token.'); - } - if (response.status === 403) { - throw new PlaywrightServiceError('Access forbidden. Check your permissions.'); - } - if (!response.ok) { - const text = await response.text(); - throw new PlaywrightServiceError(`Failed to get browser endpoint: HTTP ${response.status}\n${text}`); - } - - const data = await response.json(); - const correlationId = new URL(data.sessionUrl).searchParams.get('correlationId'); - console.log('PWW session (correlationId):', correlationId); - return data.sessionUrl; -} diff --git a/samples/playwright-proxy-tests/rawCdp.mjs b/samples/playwright-proxy-tests/rawCdp.mjs deleted file mode 100644 index 4ef056e..0000000 --- a/samples/playwright-proxy-tests/rawCdp.mjs +++ /dev/null @@ -1,197 +0,0 @@ -/** - * rawCdp.mjs - * ============================================================================ - * Sample 2 of 3 — Raw CDP (no Playwright, hand-rolled JSON-RPC) - * ============================================================================ - * Drives a remote Chromium on Microsoft Playwright Workspaces (PWW) over a - * CDP WebSocket by writing every `Target.*` / `Page.*` / `Fetch.*` frame by - * hand. This is what `connectOverCdp.mjs` emits under the hood — shown - * explicitly so the wire protocol is fully visible. - * - * The proxy-auth dance, expanded: - * Target.createBrowserContext { proxyServer } (context bound to proxy) - * Target.createTarget { browserContextId } (open a tab there) - * Target.attachToTarget { targetId, flatten } (get a sessionId) - * Page.enable / Runtime.enable (navigate + evaluate) - * Fetch.enable { handleAuthRequests: true } (intercept 407) - * Fetch.authRequired ← proxy responded 407 Proxy-Authenticate - * Fetch.continueWithAuth { ProvideCredentials, username, password } - * - * Demo runs two steps through the proxied session: - * 1. PROXIED session -> https://api.ipify.org?format=json - * (shows the PROXY's egress IP) - * 2. SAME session -> $PROXY_ONLY_URL - * (fetches a URL of your choice through the proxy — e.g. a private - * intranet origin only reachable via your proxy) - * - * Run: - * node rawCdp.mjs - * - * Print every CDP frame sent (>>) and received (<<): - * $env:CDP_DEBUG=1; node rawCdp.mjs; Remove-Item env:CDP_DEBUG - */ - -import { config } from 'dotenv'; -config({ path: new URL('./.env', import.meta.url) }); - -import { getCdpEndpoint } from './pwwSessionClient.mjs'; - -// ─────────────────────────────────────────────────────────────────────────── // -// Config // -// ─────────────────────────────────────────────────────────────────────────── // - -const { PROXY_SERVER, PROXY_USERNAME, PROXY_PASSWORD, PROXY_ONLY_URL } = process.env; - -const IPIFY_URL = 'https://api.ipify.org?format=json'; - -const DEBUG = process.env.CDP_DEBUG === '1'; -const trunc = (s, n = 200) => (s.length > n ? s.slice(0, n) + '…' : s); - -// ─────────────────────────────────────────────────────────────────────────── // -// Open the CDP WebSocket to PWW // -// getCdpEndpoint() asks the PWW REST API for a one-shot wss:// URL pointing // -// at a remote Chromium's CDP socket. // -// ─────────────────────────────────────────────────────────────────────────── // - -const ws = new WebSocket(await getCdpEndpoint()); -await new Promise((resolve, reject) => { ws.onopen = resolve; ws.onerror = reject; }); - -// ─────────────────────────────────────────────────────────────────────────── // -// Tiny JSON-RPC client over the WebSocket // -// CDP frames are JSON. Requests carry numeric `id`; responses echo it. // -// Anything without `id` is an event we broadcast to listeners. // -// ─────────────────────────────────────────────────────────────────────────── // - -let nextId = 0; -const pending = new Map(); // id -> resolver fn -const listeners = new Set(); // event handlers - -ws.onmessage = async (ev) => { - const text = typeof ev.data === 'string' ? ev.data : await ev.data.text(); - if (DEBUG) console.log('<<', trunc(text)); - const msg = JSON.parse(text); - if (msg.id != null) { - pending.get(msg.id)?.(msg); - pending.delete(msg.id); - } else { - for (const fn of listeners) fn(msg); - } -}; - -const send = (method, params = {}, sessionId) => - new Promise((resolve, reject) => { - const id = ++nextId; - pending.set(id, (m) => m.error - ? reject(new Error(`${method}: ${m.error.message}`)) - : resolve(m.result)); - const frame = JSON.stringify({ id, method, params, sessionId }); - if (DEBUG) console.log('>>', trunc(frame)); - ws.send(frame); - }); - -// Convenience: wait for a single event matching (sessionId, method). -const waitForEvent = (sessionId, method) => - new Promise((resolve) => { - const fn = (m) => { - if (m.sessionId === sessionId && m.method === method) { - listeners.delete(fn); - resolve(m); - } - }; - listeners.add(fn); - }); - -// ═════════════════════════════════════════════════════════════════════════ // -// ONE-TIME SETUP // -// ═════════════════════════════════════════════════════════════════════════ // - -// (a) Create the proxied browser context + a page inside it. -const { browserContextId } = await send('Target.createBrowserContext', { - proxyServer: PROXY_SERVER, -}); -const { targetId: proxiedTargetId } = await send('Target.createTarget', { - url: 'about:blank', - browserContextId, -}); - -// (b) Attach to the proxied target to get a sessionId for driving it. -// `flatten: true` is required so messages multiplex via sessionId on the -// single CDP socket PWW exposes per browser. -const { sessionId: proxiedSession } = await send('Target.attachToTarget', { targetId: proxiedTargetId, flatten: true }); - -// (c) Enable Page/Runtime so we can navigate + evaluate. -await send('Page.enable', {}, proxiedSession); -await send('Runtime.enable', {}, proxiedSession); - -// (d) Wire up proxy-auth interception. Fetch.enable pauses every request; -// our listener provides credentials on a Proxy 407 and forwards the rest. -await send('Fetch.enable', { - handleAuthRequests: true, - patterns: [{ urlPattern: '*' }], -}, proxiedSession); - -listeners.add((m) => { - if (m.sessionId !== proxiedSession) return; - - if (m.method === 'Fetch.authRequired') { - if (m.params.authChallenge.source === 'Proxy') { - // Proxy returned 407 Proxy-Authenticate. Provide creds; Chromium - // will retry the request with Proxy-Authorization: Basic ... - send('Fetch.continueWithAuth', { - requestId: m.params.requestId, - authChallengeResponse: { - response: 'ProvideCredentials', - username: PROXY_USERNAME, - password: PROXY_PASSWORD, - }, - }, proxiedSession); - } else { - // Origin server 401 — NEVER leak proxy creds to the target site. - send('Fetch.continueWithAuth', { - requestId: m.params.requestId, - authChallengeResponse: { response: 'CancelAuth' }, - }, proxiedSession); - } - } else if (m.method === 'Fetch.requestPaused') { - // Not an auth challenge: forward the request unchanged. - send('Fetch.continueRequest', { requestId: m.params.requestId }, proxiedSession); - } -}); - -// ═════════════════════════════════════════════════════════════════════════ // -// STEP 1 — PROXIED session → ipify // -// Triggers the Fetch.authRequired / continueWithAuth dance our listener // -// handles. ipify reports the PROXY's egress IP. // -// ═════════════════════════════════════════════════════════════════════════ // - -{ - const loaded = waitForEvent(proxiedSession, 'Page.loadEventFired'); - await send('Page.navigate', { url: IPIFY_URL }, proxiedSession); - await loaded; - - const { result } = await send('Runtime.evaluate', - { expression: 'document.body.innerText' }, proxiedSession); - - console.log('--- 1) PROXIED -> ipify ---'); - console.log(result.value); -} - -// ═════════════════════════════════════════════════════════════════════════ // -// STEP 2 — SAME proxied session → a URL of your choice // -// Customer-supplied via PROXY_ONLY_URL in .env. Use any hostname that is // -// reachable through your proxy. // -// ═════════════════════════════════════════════════════════════════════════ // - -{ - const loaded = waitForEvent(proxiedSession, 'Page.loadEventFired'); - await send('Page.navigate', { url: PROXY_ONLY_URL }, proxiedSession); - await loaded; - - const { result } = await send('Runtime.evaluate', - { expression: 'document.body.innerText' }, proxiedSession); - - console.log(`--- 2) PROXIED -> ${PROXY_ONLY_URL} ---`); - console.log(result.value); -} - -ws.close(); diff --git a/samples/playwright-tests/Readme.md b/samples/playwright-tests/Readme.md index 02443cf..86484cc 100644 --- a/samples/playwright-tests/Readme.md +++ b/samples/playwright-tests/Readme.md @@ -53,6 +53,31 @@ This sample demonstrates how to run Playwright tests using cloud-hosted browsers npx playwright test tests/example.spec.ts --config=playwright.service.config.ts ``` +## Optional: route tests through an authenticated HTTP proxy + +If your tests need to reach a private origin via an authenticated forward +proxy, use the opt-in [`playwright.service.proxy.config.ts`](./playwright.service.proxy.config.ts). +It extends `playwright.service.config.ts` with `use.proxy` and points `testDir` +at [`./tests-proxy`](./tests-proxy), so the default `npx playwright test` +command and the existing `tests/` specs are unaffected. + +1. Set the proxy env vars (in addition to `PLAYWRIGHT_SERVICE_URL`): + + ```powershell + $env:PROXY_SERVER = "http://:8080" + $env:PROXY_USERNAME = "" + $env:PROXY_PASSWORD = "" + $env:PROXY_ONLY_URL = "http://intranet.example/healthcheck" + ``` + +2. Run only the proxy specs: + + ```bash + npx playwright test --config=playwright.service.proxy.config.ts + ``` + +Playwright handles the proxy 407 challenge with the credentials in `use.proxy`. + ## Need Help? If you run into issues, open an issue in this repository or refer to the [Playwright Workspaces documentation](https://aka.ms/pww/docs). diff --git a/samples/playwright-tests/playwright.service.proxy.config.ts b/samples/playwright-tests/playwright.service.proxy.config.ts new file mode 100644 index 0000000..f041ddc --- /dev/null +++ b/samples/playwright-tests/playwright.service.proxy.config.ts @@ -0,0 +1,39 @@ +import { defineConfig } from '@playwright/test'; +import { createAzurePlaywrightConfig, ServiceOS } from '@azure/playwright'; +import { DefaultAzureCredential } from '@azure/identity'; +import config from './playwright.config'; + +/** + * Opt-in PWW service config that routes every test context through an + * authenticated HTTP forward proxy. + * + * Required env vars (in addition to PLAYWRIGHT_SERVICE_URL): + * PROXY_SERVER e.g. http://:8080 + * PROXY_USERNAME + * PROXY_PASSWORD + * + * Run only the proxy-tagged specs: + * npx playwright test --config=playwright.service.proxy.config.ts + * + * The default `npx playwright test --config=playwright.service.config.ts` + * is unaffected. + */ +export default defineConfig( + config, + createAzurePlaywrightConfig(config, { + exposeNetwork: '', + connectTimeout: 3 * 60 * 1000, + os: ServiceOS.LINUX, + credential: new DefaultAzureCredential(), + }), + { + testDir: './tests-proxy', + use: { + proxy: { + server: process.env.PROXY_SERVER!, + username: process.env.PROXY_USERNAME, + password: process.env.PROXY_PASSWORD, + }, + }, + } +); diff --git a/samples/playwright-tests/tests-proxy/proxy.spec.ts b/samples/playwright-tests/tests-proxy/proxy.spec.ts new file mode 100644 index 0000000..0b7e7f9 --- /dev/null +++ b/samples/playwright-tests/tests-proxy/proxy.spec.ts @@ -0,0 +1,19 @@ +import { test, expect } from '@playwright/test'; + +/** + * Opt-in spec — only runs under playwright.service.proxy.config.ts. + * Set PROXY_ONLY_URL to a host reachable through your proxy + * (e.g. a private intranet origin). + */ +test('fetches PROXY_ONLY_URL through the proxied PWW context', async ({ page }) => { + const target = process.env.PROXY_ONLY_URL; + test.skip(!target, 'PROXY_ONLY_URL is not set'); + + const response = await page.goto(target!); + expect(response?.ok()).toBeTruthy(); + + const body = await page.locator('body').innerText(); + console.log(`--- PROXIED -> ${target} ---`); + console.log(body); + expect(body.length).toBeGreaterThan(0); +});