From 941d0590b4fa77392eb449d521da62ca0b7695fd Mon Sep 17 00:00:00 2001 From: Justin Paul Date: Sun, 10 May 2026 21:16:14 -0400 Subject: [PATCH] Sync from Gitea: drop Windows-ZVM recipe + sender script Windows ZVM is largely deprecated, so the older recipe and its curl.exe-based sender are removed. ZVMA recipe is promoted to canonical and its header no longer references the deleted file. Also fixes broken links in README, docs/README, installation.md, and sync-wiki.ps1. --- README.md | 5 +- docs/README.md | 7 +- docs/installation.md | 2 +- docs/recipes/zerto-pre-post-scripts.md | 243 ----------------------- docs/recipes/zerto-zvma-pre-post.md | 16 +- scripts/examples/zerto-post-failover.ps1 | 78 -------- scripts/sync-wiki.ps1 | 4 +- 7 files changed, 15 insertions(+), 340 deletions(-) delete mode 100644 docs/recipes/zerto-pre-post-scripts.md delete mode 100644 scripts/examples/zerto-post-failover.ps1 diff --git a/README.md b/README.md index 3548879..4de81f4 100644 --- a/README.md +++ b/README.md @@ -61,12 +61,11 @@ Everything you need to operate the server: Recipes: -- [Zerto failover post-script → DNS + service checks](docs/recipes/zerto-pre-post-scripts.md) ← **canonical use case** (Windows ZVM) -- [Zerto ZVMA (Kubernetes) pre/post → notify + VM health check](docs/recipes/zerto-zvma-pre-post.md) — same pattern for the in-cluster scripts-service +- [Zerto ZVMA (Kubernetes) pre/post → notify + VM health check](docs/recipes/zerto-zvma-pre-post.md) ← **canonical use case** - [GitHub-style HMAC-signed webhook](docs/recipes/github-style-hmac.md) - [Pop UI on the user's desktop](docs/recipes/ui-on-desktop.md) -Ready-to-drop-in Zerto-side scripts are included at [`scripts/examples/zerto-post-failover.ps1`](scripts/examples/zerto-post-failover.ps1) (Windows ZVM) and [`scripts/examples/zerto-zvma-send.ps1`](scripts/examples/zerto-zvma-send.ps1) (ZVMA / Kubernetes); receiver examples for the ZVMA recipe ship as [`zerto-receiver-notify.ps1`](scripts/examples/zerto-receiver-notify.ps1) and [`zerto-receiver-vm-healthcheck.ps1`](scripts/examples/zerto-receiver-vm-healthcheck.ps1). +The Zerto ZVMA recipe ships ready-to-drop-in scripts: [`scripts/examples/zerto-zvma-send.ps1`](scripts/examples/zerto-zvma-send.ps1) (sender, runs inside the ZVMA `scripts-service` container) plus [`zerto-receiver-notify.ps1`](scripts/examples/zerto-receiver-notify.ps1) and [`zerto-receiver-vm-healthcheck.ps1`](scripts/examples/zerto-receiver-vm-healthcheck.ps1) (receivers, run on the Webhook Server host). ## Requirements diff --git a/docs/README.md b/docs/README.md index b61338f..cbb8700 100644 --- a/docs/README.md +++ b/docs/README.md @@ -6,7 +6,7 @@ Webhook Server is a Windows service that runs a script (PowerShell, cmd, or any 1. [Concepts](concepts.md) — five-minute read on what a webhook is and how this server uses one 2. [Installation](installation.md) — download, install, first endpoint -3. [Recipe: Zerto failover post-script → DNS + service checks](recipes/zerto-pre-post-scripts.md) — the canonical reason this exists +3. [Recipe: Zerto ZVMA pre/post → notify + VM health check](recipes/zerto-zvma-pre-post.md) — the canonical reason this exists ## Topical @@ -19,12 +19,11 @@ Webhook Server is a Windows service that runs a script (PowerShell, cmd, or any ## Recipes (cookbook style) -- [Zerto failover post-script → DNS + service checks](recipes/zerto-pre-post-scripts.md) ← canonical use case (Windows ZVM) -- [Zerto ZVMA (Kubernetes) pre/post → notify + VM health check](recipes/zerto-zvma-pre-post.md) — same pattern for the in-cluster scripts-service +- [Zerto ZVMA (Kubernetes) pre/post → notify + VM health check](recipes/zerto-zvma-pre-post.md) ← canonical use case - [GitHub-style HMAC-signed webhook](recipes/github-style-hmac.md) - [Pop UI on the user's desktop](recipes/ui-on-desktop.md) -The flagship Zerto recipe ships with a ready-to-use Zerto-side post-script at [`scripts/examples/zerto-post-failover.ps1`](../scripts/examples/zerto-post-failover.ps1). The ZVMA recipe ships with [`zerto-zvma-send.ps1`](../scripts/examples/zerto-zvma-send.ps1) (sender) plus [`zerto-receiver-notify.ps1`](../scripts/examples/zerto-receiver-notify.ps1) and [`zerto-receiver-vm-healthcheck.ps1`](../scripts/examples/zerto-receiver-vm-healthcheck.ps1) (receivers). +The Zerto ZVMA recipe ships with [`zerto-zvma-send.ps1`](../scripts/examples/zerto-zvma-send.ps1) (sender, runs inside the ZVMA `scripts-service` container) plus [`zerto-receiver-notify.ps1`](../scripts/examples/zerto-receiver-notify.ps1) and [`zerto-receiver-vm-healthcheck.ps1`](../scripts/examples/zerto-receiver-vm-healthcheck.ps1) (receivers, run on the Webhook Server host). ## Reference diff --git a/docs/installation.md b/docs/installation.md index 89ae2b4..75b162c 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -76,7 +76,7 @@ The endpoint appears in the grid. Right-click it → **Copy URL**, paste into a { "runId": "...", "exitCode": 0, "durationMs": 134, "stdout": "pong\r\n", ... } ``` -That's it. Real-world recipes start with [Zerto pre/post scripts → AD / DNS update](recipes/zerto-pre-post-scripts.md). +That's it. Real-world recipes start with [Zerto ZVMA pre/post → notify + VM health check](recipes/zerto-zvma-pre-post.md). ## Silent / unattended install diff --git a/docs/recipes/zerto-pre-post-scripts.md b/docs/recipes/zerto-pre-post-scripts.md deleted file mode 100644 index eb16c19..0000000 --- a/docs/recipes/zerto-pre-post-scripts.md +++ /dev/null @@ -1,243 +0,0 @@ -# Recipe: Zerto failover post-script → DNS update + service checks - -This is the canonical reason Webhook Server exists. - -When Zerto fails a VM over from production to DR, the VM boots fine — but **the things around it** often need attention: DNS records still point at the production IP, dependent services need to be checked, on-call needs a heads-up. Zerto pre/post scripts run on the **Zerto Virtual Manager**, not on a domain controller and not necessarily with admin rights to the things that need fixing. So you want a single webhook URL that the post-script hits, and a Windows host on the DR side that does the actual work with the right identity. - -## What we're building - -Zerto's post-recovery script (a one-shot PowerShell file pointing at curl) calls `http://webhook.dr.contoso.local:8080/hook/post-failover` with a JSON body identifying the VPG and operation. The Webhook Server, running on a DR-side Windows host as a gMSA with delegated AD/DNS rights, runs PowerShell that: - -1. Updates DNS A records to point the failed-over hostnames at their DR IPs -2. Waits for the failed-over VM to come up (ping + WinRM probe) -3. Connects to the VM via PowerShell remoting and starts/checks critical services -4. Sends a Teams notification with the result - -The endpoint is **Async** so the Zerto script returns in milliseconds — no risk of timing out Zerto's failover sequence even if the actions take minutes. The script's full output ends up in the webhook log and (optionally) in an outbound callback. - -## Why curl and not Invoke-WebRequest? - -Zerto's PowerShell runner is intentionally minimal — many environments run an older Windows on the ZVM and don't have full PowerShell modules installed. `curl.exe` ships with Windows 10 1803+ and Server 2019+ and works without any modules. Plus, calling an HTTP endpoint with `curl.exe` doesn't depend on the version of `Invoke-WebRequest` shipped with the host's PowerShell. - -## 1. The Zerto post-script (client side) - -A ready-to-use script ships in this repo at [`scripts/examples/zerto-post-failover.ps1`](../../scripts/examples/zerto-post-failover.ps1). Copy it to the ZVM, edit `$WebhookUrl` and the bearer-token path at the top, and wire it into the VPG: - -> **VPG settings → Recovery → Scripts → Post-Recovery Script** -> Path: `C:\Scripts\zerto-post-failover.ps1` -> Parameters: *(leave empty)* - -The script is ~50 lines and only depends on `curl.exe` + a token file readable by the ZVM service account. - -The flow: - -``` -Zerto VPG failover starts - | - +-- VM is brought up at DR site - | - +-- Zerto post-script fires: - | curl POST http://webhook.dr/hook/post-failover (async, returns 202 in ~50ms) - | - +-- Zerto sees success, finishes the failover and reports done - | - (meanwhile, on the webhook server) - | - running PowerShell for several minutes: - - update DNS - - wait for VM ready - - check services on VM - - notify Teams -``` - -## 2. The server-side script (does the actual work) - -Save this on the webhook host as `C:\Scripts\post-failover-handler.ps1`: - -```powershell -[CmdletBinding()] -param() -$ErrorActionPreference = 'Stop' - -$body = $input | ConvertFrom-Json - -# ---------- environment specifics; edit for your site ---------- -$dnsServer = 'dc01.contoso.local' -$forwardZone = 'contoso.local' -$teamsWebhook = 'https://contoso.webhook.office.com/...' -$drIpMap = @{ - 'app01' = '10.42.10.11' - 'app02' = '10.42.10.12' - 'db01' = '10.42.10.21' -} -$serviceMap = @{ - 'app01' = @('W3SVC','MyAppSvc') - 'app02' = @('W3SVC','MyAppSvc') - 'db01' = @('MSSQLSERVER','SQLAgent') -} -# --------------------------------------------------------------- - -# Default the VM list to "all VMs we know about" if the post-script didn't -# tell us, so the same handler works without having to embed the VM list in -# every Zerto post-script. -$vms = if ($body.vms) { $body.vms } else { $drIpMap.Keys } - -$summary = @() - -foreach ($vm in $vms) { - if (-not $drIpMap.ContainsKey($vm)) { - $summary += "skip $vm (no DR IP mapping in handler)" - continue - } - $ip = $drIpMap[$vm] - - # 1. DNS - delete + re-add the A record - try { - $existing = Get-DnsServerResourceRecord -ZoneName $forwardZone -Name $vm ` - -RRType A -ComputerName $dnsServer -ErrorAction SilentlyContinue - if ($existing) { - Remove-DnsServerResourceRecord -ZoneName $forwardZone -Name $vm ` - -RRType A -RecordData $existing.RecordData.IPv4Address ` - -ComputerName $dnsServer -Force - } - Add-DnsServerResourceRecordA -ZoneName $forwardZone -Name $vm ` - -IPv4Address $ip -ComputerName $dnsServer -TimeToLive 00:05:00 - $summary += "dns $vm -> $ip" - } catch { - $summary += "DNS! $vm $($_.Exception.Message)" - continue - } - - # 2. Wait for the VM to be reachable (up to 5 minutes) - $deadline = (Get-Date).AddMinutes(5) - $reachable = $false - while ((Get-Date) -lt $deadline) { - if (Test-Connection -ComputerName $ip -Count 1 -Quiet -ErrorAction SilentlyContinue) { - try { - # Quick WinRM probe; succeeds when the VM has finished booting - Invoke-Command -ComputerName $ip -ScriptBlock { $true } -ErrorAction Stop | Out-Null - $reachable = $true - break - } catch { Start-Sleep -Seconds 10 } - } else { - Start-Sleep -Seconds 10 - } - } - if (-not $reachable) { - $summary += "wait! $vm not reachable after 5 minutes" - continue - } - - # 3. Check + start critical services on the VM - if ($serviceMap.ContainsKey($vm)) { - $svcReport = Invoke-Command -ComputerName $ip -ArgumentList @(,$serviceMap[$vm]) -ScriptBlock { - param($services) - $report = @() - foreach ($s in $services) { - $svc = Get-Service -Name $s -ErrorAction SilentlyContinue - if (-not $svc) { $report += "$s : missing"; continue } - if ($svc.Status -ne 'Running') { - Start-Service $s - Start-Sleep -Seconds 2 - $svc.Refresh() - } - $report += "$s : $($svc.Status)" - } - return $report - } - $summary += "svc $vm : $($svcReport -join ', ')" - } else { - $summary += "svc $vm (no services configured)" - } -} - -# 4. Notify Teams -$teamsBody = @{ - text = "Webhook post-failover for VPG **$($body.vpg)**:`n" + ($summary -join "`n") -} | ConvertTo-Json -try { - Invoke-RestMethod -Uri $teamsWebhook -Method POST -ContentType 'application/json' -Body $teamsBody | Out-Null -} catch { - $summary += "teams! notification failed: $($_.Exception.Message)" -} - -# Return the summary so it shows up in the webhook log + outbound callback -$summary -join "`n" -``` - -Two things to call out: - -- **PowerShell remoting to the VM** uses the gMSA's network identity (or whoever the service runs as). Make sure the gMSA / service account can `Invoke-Command` to the failed-over hosts — usually that means the account is a local admin on the target VMs, or you've configured constrained delegation. -- **WinRM** must be enabled on the failed-over VMs for the remoting calls to work. `Enable-PSRemoting` is the simplest, but most prod environments configure WinRM via Group Policy. - -## 3. Configure the endpoint in the GUI - -**File → New endpoint:** - -| Section | Setting | Value | -|---|---|---| -| Identity | Slug | `post-failover` | -| Identity | Description | "Zerto post-recovery: DNS + service checks" | -| Auth | Mode | **Bearer** | -| Auth | Bearer secret | generate a 32-byte random string; copy it for the Zerto script's token file | -| Allowed clients | (one per line) | `10.0.0.0/8` *(your ZVM's network)* | -| Executor | Type | **Windows PowerShell** | -| Executor | Script path | `C:\Scripts\post-failover-handler.ps1` | -| Data passing | JSON body to stdin | ✓ | -| Run as | Identity | **Service** if the service runs under a gMSA with the right rights, otherwise **SpecificUser** with a delegated account | -| Response | Mode | **Async** ← critical: this is what makes the Zerto script non-blocking | -| Response | Timeout (sec) | `600` *(this is the cap on the long-running handler script, not the Zerto-facing response)* | -| Response | Fail on non-zero exit | unticked *(async hooks have no caller to receive a 502)* | - -Save. Right-click the row → **Copy URL** to grab `http://webhook.dr.contoso.local:8080/hook/post-failover` and paste it into `$WebhookUrl` at the top of the Zerto-side script. - -> **Why Bearer instead of HMAC?** Both work. Bearer is simpler — drop the token in a file on the ZVM that's readable by the ZVM service account and you're done. HMAC requires the Zerto-side script to compute a signature, which is doable but adds a few lines of code. Pick what fits your environment. - -## 4. Wire up the bearer token - -Place the bearer token in a file the ZVM service account can read (and nobody else): - -```powershell -# on the ZVM, from elevated PowerShell -$token = (New-Guid).ToString('N') # or paste the value from the GUI -$tokenPath = 'C:\ProgramData\Zerto\webhook-token.txt' -$token | Out-File -LiteralPath $tokenPath -Encoding utf8 -NoNewline -icacls $tokenPath /inheritance:r /grant 'NT SERVICE\Zerto Online Services:R' 'BUILTIN\Administrators:F' /T -``` - -Adjust the service principal name to whatever Zerto runs as on your version. The script reads from this path automatically; no change needed in the script itself. - -## 5. Test before going live - -In a maintenance window, fire the webhook by hand: - -```powershell -# from any machine that can reach the webhook server -$body = @{ - operation = 'test' - vpg = 'SmokeTest' - timestamp = (Get-Date).ToUniversalTime().ToString('o') -} | ConvertTo-Json -Compress - -curl.exe --silent --show-error --max-time 10 -X POST ` - -H "Authorization: Bearer paste-the-token" ` - -H "Content-Type: application/json" ` - -d $body ` - http://webhook.dr.contoso.local:8080/hook/post-failover -``` - -You'll get back `{"runId":"…","accepted":true}` immediately. Open the Webhook Server GUI and watch the log panel — within 30 seconds or so you'll see lines for the run. Confirm DNS records updated, services on each VM ended in `Running`, and the Teams notification arrived. - -## Variations - -### Different actions for failover vs. failback - -Pass an `operation` field in the body and branch on it. The Zerto-side script already sends `operation = 'failover'`. Add a separate post-failback script (or detect from `$env:ZertoOperationType`) that sends `operation = 'failback'` and have the handler revert DNS to production IPs. - -### Per-VPG endpoints - -If you want fine-grained access control or different actions per VPG, create one endpoint per VPG (`post-failover-app`, `post-failover-db`, …) and give each its own bearer token. The GUI handles dozens of endpoints fine. - -### Audit trail to a SIEM - -Each endpoint can have an outbound **Callback** URL. Configure it with your SIEM's HTTP collector + an HMAC secret, and every run produces a JSON record with runId, exit code, duration, stdout, and stderr — perfect for compliance. diff --git a/docs/recipes/zerto-zvma-pre-post.md b/docs/recipes/zerto-zvma-pre-post.md index 7b8b459..5962749 100644 --- a/docs/recipes/zerto-zvma-pre-post.md +++ b/docs/recipes/zerto-zvma-pre-post.md @@ -1,12 +1,10 @@ -# Recipe: Zerto ZVMA (Kubernetes) pre/post scripts → notify + VM health check - -> Companion to [Zerto failover post-script → DNS + service checks](zerto-pre-post-scripts.md). -> That recipe targets the **Windows ZVM** (the older deployment, where the -> Zerto-side script is a `.ps1` calling `curl.exe`). **This** recipe targets -> the **ZVMA on Kubernetes** — the newer deployment, where pre/post scripts -> run inside the in-cluster `scripts-service` container (Linux + pwsh 7). -> The webhook-server side is the same Windows service in both cases; only -> the Zerto-side runtime differs. +# Recipe: Zerto ZVMA pre/post scripts → notify + VM health check + +> This is the **canonical** Zerto recipe. It targets the **ZVMA on +> Kubernetes** — the supported deployment — where pre/post scripts run +> inside the in-cluster `scripts-service` container (Linux + pwsh 7). The +> webhook-server side is a normal Windows service that does the +> Windows-domain work the ZVMA container can't reach directly. ## What we're building diff --git a/scripts/examples/zerto-post-failover.ps1 b/scripts/examples/zerto-post-failover.ps1 deleted file mode 100644 index ed55cb4..0000000 --- a/scripts/examples/zerto-post-failover.ps1 +++ /dev/null @@ -1,78 +0,0 @@ -<# -.SYNOPSIS - Zerto post-failover script. Fires the on-prem Webhook Server which does - the real work (DNS updates, service health checks, notifications). - -.DESCRIPTION - Designed to be dropped into a Zerto VPG's post-recovery script slot. The - Zerto Virtual Manager's PowerShell runner has a limited module set and - runs scripts synchronously, so this script: - - - uses curl.exe (ships with Windows 10 1803+ / Server 2019+) instead - of any module-dependent HTTP client; - - calls an ASYNC webhook endpoint - the server returns 202 in - milliseconds and runs the actual work in the background; - - returns within seconds regardless of how long the post-failover - actions take, so Zerto's failover sequence is never blocked. - - Wire this into your VPG via the Zerto UI: - VPG settings -> Recovery -> Scripts -> Post-Recovery Script - Path: C:\path\to\zerto-post-failover.ps1 - Parameters: leave empty (we read from $env:ZertoVPGName) - -.NOTES - Configure $WebhookUrl and either: - - paste the bearer token directly into $Bearer (simplest, but the - token then lives in this file), or - - point $BearerFile at a file readable only by the ZVM service - account (better - same threat model as Zerto's own credential - storage). -#> - -$ErrorActionPreference = 'Stop' - -# ----------------------------- CONFIGURE --------------------------------- -$WebhookUrl = 'http://webhook.contoso.local:8080/hook/post-failover' -$Bearer = '' # paste here, or use $BearerFile -$BearerFile = 'C:\ProgramData\Zerto\webhook-token.txt' # one line: the token -# ------------------------------------------------------------------------- - -if (-not $Bearer -and (Test-Path $BearerFile)) { - $Bearer = (Get-Content -LiteralPath $BearerFile -TotalCount 1).Trim() -} -if (-not $Bearer) { - throw "No bearer token. Set `$Bearer in this script or write the token to $BearerFile." -} - -# Compose the payload. Zerto exposes a few env vars; fall back gracefully. -$payload = @{ - operation = 'failover' - vpg = if ($env:ZertoVPGName) { $env:ZertoVPGName } else { 'unknown' } - timestamp = (Get-Date).ToUniversalTime().ToString('o') -} | ConvertTo-Json -Compress - -# curl on Windows handles long / quoted JSON better via @file than via -d "...". -$tempBody = Join-Path $env:TEMP ("zerto-webhook-{0}.json" -f ([guid]::NewGuid())) -$payload | Out-File -FilePath $tempBody -Encoding utf8 -NoNewline - -try { - Write-Host "POST $WebhookUrl (vpg=$($env:ZertoVPGName))" - & curl.exe ` - --silent --show-error --fail-with-body ` - --max-time 10 ` - -X POST ` - -H "Authorization: Bearer $Bearer" ` - -H "Content-Type: application/json" ` - -d "@$tempBody" ` - "$WebhookUrl" - if ($LASTEXITCODE -ne 0) { - # curl prints its own error to stderr; surface a non-zero exit so Zerto's - # script log records the failure but we don't block the failover. - Write-Warning "Webhook call failed with curl exit $LASTEXITCODE; continuing." - } else { - Write-Host "Webhook accepted (run id is in the response above)." - } -} -finally { - Remove-Item $tempBody -ErrorAction SilentlyContinue -} diff --git a/scripts/sync-wiki.ps1 b/scripts/sync-wiki.ps1 index d62268b..5952ea5 100644 --- a/scripts/sync-wiki.ps1 +++ b/scripts/sync-wiki.ps1 @@ -65,7 +65,7 @@ $mapping.Add('runas-modes.md', 'Run-As-Modes') $mapping.Add('service-account-and-ad.md', 'Service-Account-and-AD') $mapping.Add('network-and-security.md', 'Network-and-Security') $mapping.Add('troubleshooting.md', 'Troubleshooting') -$mapping.Add('recipes/zerto-pre-post-scripts.md', 'Recipe-Zerto-Failover') +$mapping.Add('recipes/zerto-zvma-pre-post.md', 'Recipe-Zerto-ZVMA') $mapping.Add('recipes/github-style-hmac.md', 'Recipe-GitHub-HMAC') $mapping.Add('recipes/ui-on-desktop.md', 'Recipe-UI-on-Desktop') @@ -94,7 +94,7 @@ function New-Sidebar() { } $lines += "" $lines += "## Recipes" - foreach ($key in @('recipes/zerto-pre-post-scripts.md','recipes/github-style-hmac.md','recipes/ui-on-desktop.md')) { + foreach ($key in @('recipes/zerto-zvma-pre-post.md','recipes/github-style-hmac.md','recipes/ui-on-desktop.md')) { $slug = $mapping[$key] $lines += "- [$($slug -replace '^Recipe-' -replace '-', ' ')]($slug)" }