From f44a4a92507a84b03575e4ad5ab3774b5400c41e Mon Sep 17 00:00:00 2001 From: Kevin Codex Date: Fri, 5 Jun 2026 08:30:28 +0800 Subject: [PATCH 1/6] refactor: move fly.toml into infra/fly/ Deployment configs now live under infra/, organized per target. Dockerfiles and docker-compose.yml intentionally stay at the repo root (shared by CI, build scripts, and the macOS app). Deploy with: fly deploy -c infra/fly/fly.toml Co-Authored-By: OpenClaude --- docs/RUN-A-NODE.md | 2 +- infra/README.md | 30 ++++++++++++++++++++++++++++++ fly.toml => infra/fly/fly.toml | 2 +- 3 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 infra/README.md rename fly.toml => infra/fly/fly.toml (96%) diff --git a/docs/RUN-A-NODE.md b/docs/RUN-A-NODE.md index 8a40d4c..987919f 100644 --- a/docs/RUN-A-NODE.md +++ b/docs/RUN-A-NODE.md @@ -8,7 +8,7 @@ Step-by-step guide to staking $GITLAWB, registering your node on-chain, and earn - A wallet with at least **10,000 $GITLAWB** (minimum stake) plus a small amount of ETH on Base for gas - Docker or Rust 1.91+ (for running the node process) -- A public HTTP URL (your-host.com) — can be a VPS, Fly.io app, or anything reachable +- A public HTTP URL (your-host.com) — can be a VPS, Fly.io app, or anything reachable. A Fly.io config is provided at `infra/fly/fly.toml` (deploy from the repo root with `fly deploy -c infra/fly/fly.toml`) --- diff --git a/infra/README.md b/infra/README.md new file mode 100644 index 0000000..0c5e6cc --- /dev/null +++ b/infra/README.md @@ -0,0 +1,30 @@ +# infra/ + +Deployment configuration, organized by target — one subdirectory per platform. + +``` +infra/ +└── fly/ + └── fly.toml # Fly.io app config (gitlawb-node-test) +``` + +## Deploying to Fly.io + +Run from the **repo root** so the Docker build context includes `crates/`, +`Cargo.toml`, and `bootstrap-peers.json`: + +```sh +fly deploy -c infra/fly/fly.toml +``` + +The `dockerfile` path inside `fly.toml` is resolved relative to the config +file, so it points to `../../Dockerfile`. + +## What intentionally stays at the repo root + +- `Dockerfile` / `Dockerfile.bins` — shared by the release CI workflow + (`.github/workflows/release.yml`), `scripts/build-bins.sh`, and Fly builds. +- `docker-compose.yml` — local dev stack; bundled into the macOS app by + `scripts/build-macos-app.sh` and used for repo detection by the app. + +Future targets (e.g. `infra/aws/`) should follow the same per-platform layout. diff --git a/fly.toml b/infra/fly/fly.toml similarity index 96% rename from fly.toml rename to infra/fly/fly.toml index 133a4aa..2637fa7 100644 --- a/fly.toml +++ b/infra/fly/fly.toml @@ -2,7 +2,7 @@ app = "gitlawb-node-test" primary_region = "iad" [build] - dockerfile = "Dockerfile" + dockerfile = "../../Dockerfile" [env] GITLAWB_HOST = "0.0.0.0" From 2cae505f85ee8122ae9e74d211c013f8818a2706 Mon Sep 17 00:00:00 2001 From: Kevin Codex Date: Fri, 5 Jun 2026 08:30:28 +0800 Subject: [PATCH 2/6] feat(infra): Terraform for single-EC2 AWS deployment infra/aws/ provisions a production-leaning single-node deployment: - t4g.small (AL2023 arm64) running the published ghcr.io/gitlawb/node image + postgres:16 via docker compose - separate encrypted EBS data volume (prevent_destroy) with daily DLM snapshots; survives instance replacement - postgres password via random_password -> SSM SecureString, fetched at boot by instance profile (never in user-data); optional secrets (operator key, Pinata JWT, S3 secret) follow the same path - SSM Session Manager access (no SSH by default), IMDSv2 required, metrics port closed unless explicitly opened - SSM command document for image upgrades (user-data runs once) Verified: terraform fmt/init/validate clean; rendered compose passes docker compose config; rendered user-data passes bash -n. Co-Authored-By: OpenClaude --- infra/README.md | 12 +- infra/aws/.gitignore | 13 ++ infra/aws/.terraform.lock.hcl | 46 ++++ infra/aws/README.md | 120 ++++++++++ infra/aws/compose.yaml.tftpl | 57 +++++ infra/aws/main.tf | 359 +++++++++++++++++++++++++++++ infra/aws/outputs.tf | 34 +++ infra/aws/terraform.tfvars.example | 42 ++++ infra/aws/user-data.sh.tftpl | 97 ++++++++ infra/aws/variables.tf | 223 ++++++++++++++++++ infra/aws/versions.tf | 28 +++ 11 files changed, 1028 insertions(+), 3 deletions(-) create mode 100644 infra/aws/.gitignore create mode 100644 infra/aws/.terraform.lock.hcl create mode 100644 infra/aws/README.md create mode 100644 infra/aws/compose.yaml.tftpl create mode 100644 infra/aws/main.tf create mode 100644 infra/aws/outputs.tf create mode 100644 infra/aws/terraform.tfvars.example create mode 100644 infra/aws/user-data.sh.tftpl create mode 100644 infra/aws/variables.tf create mode 100644 infra/aws/versions.tf diff --git a/infra/README.md b/infra/README.md index 0c5e6cc..aa0ca47 100644 --- a/infra/README.md +++ b/infra/README.md @@ -4,8 +4,9 @@ Deployment configuration, organized by target — one subdirectory per platform. ``` infra/ -└── fly/ - └── fly.toml # Fly.io app config (gitlawb-node-test) +├── fly/ +│ └── fly.toml # Fly.io app config (gitlawb-node-test) +└── aws/ # Terraform: single EC2 + Docker (see aws/README.md) ``` ## Deploying to Fly.io @@ -20,6 +21,11 @@ fly deploy -c infra/fly/fly.toml The `dockerfile` path inside `fly.toml` is resolved relative to the config file, so it points to `../../Dockerfile`. +## Deploying to AWS + +See [`aws/README.md`](aws/README.md) — Terraform for a single EC2 instance +running the published `ghcr.io/gitlawb/node` image with Docker compose. + ## What intentionally stays at the repo root - `Dockerfile` / `Dockerfile.bins` — shared by the release CI workflow @@ -27,4 +33,4 @@ file, so it points to `../../Dockerfile`. - `docker-compose.yml` — local dev stack; bundled into the macOS app by `scripts/build-macos-app.sh` and used for repo detection by the app. -Future targets (e.g. `infra/aws/`) should follow the same per-platform layout. +Future targets should follow the same per-platform layout. diff --git a/infra/aws/.gitignore b/infra/aws/.gitignore new file mode 100644 index 0000000..7d3eeae --- /dev/null +++ b/infra/aws/.gitignore @@ -0,0 +1,13 @@ +# Terraform state and workdir — never commit (state contains secrets) +*.tfstate +*.tfstate.* +.terraform/ +crash.log +crash.*.log + +# Local variable files may contain secrets; keep the example only +*.tfvars +*.tfvars.json +!terraform.tfvars.example + +# Note: .terraform.lock.hcl IS committed (reproducible provider versions) diff --git a/infra/aws/.terraform.lock.hcl b/infra/aws/.terraform.lock.hcl new file mode 100644 index 0000000..e1b1ca6 --- /dev/null +++ b/infra/aws/.terraform.lock.hcl @@ -0,0 +1,46 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.100.0" + constraints = "~> 5.0" + hashes = [ + "h1:wOhTPz6apLBuF7/FYZuCoXRK/MLgrNprZ3vXmq83g5k=", + "zh:054b8dd49f0549c9a7cc27d159e45327b7b65cf404da5e5a20da154b90b8a644", + "zh:0b97bf8d5e03d15d83cc40b0530a1f84b459354939ba6f135a0086c20ebbe6b2", + "zh:1589a2266af699cbd5d80737a0fe02e54ec9cf2ca54e7e00ac51c7359056f274", + "zh:6330766f1d85f01ae6ea90d1b214b8b74cc8c1badc4696b165b36ddd4cc15f7b", + "zh:7c8c2e30d8e55291b86fcb64bdf6c25489d538688545eb48fd74ad622e5d3862", + "zh:99b1003bd9bd32ee323544da897148f46a527f622dc3971af63ea3e251596342", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:9f8b909d3ec50ade83c8062290378b1ec553edef6a447c56dadc01a99f4eaa93", + "zh:aaef921ff9aabaf8b1869a86d692ebd24fbd4e12c21205034bb679b9caf883a2", + "zh:ac882313207aba00dd5a76dbd572a0ddc818bb9cbf5c9d61b28fe30efaec951e", + "zh:bb64e8aff37becab373a1a0cc1080990785304141af42ed6aa3dd4913b000421", + "zh:dfe495f6621df5540d9c92ad40b8067376350b005c637ea6efac5dc15028add4", + "zh:f0ddf0eaf052766cfe09dea8200a946519f653c384ab4336e2a4a64fdd6310e9", + "zh:f1b7e684f4c7ae1eed272b6de7d2049bb87a0275cb04dbb7cda6636f600699c9", + "zh:ff461571e3f233699bf690db319dfe46aec75e58726636a0d97dd9ac6e32fb70", + ] +} + +provider "registry.terraform.io/hashicorp/random" { + version = "3.9.0" + constraints = "~> 3.6" + hashes = [ + "h1:lVDv+0AjDjrLfpmaJbWqUmIw/k3/AHXLc3N4m55SNdo=", + "zh:161ad0bd9a75768c82f53fb6e7172a9d8be2d4889b012645a34795031aaf1bf1", + "zh:19dc9a5b17729725ccfc4f45b0500af0ee5bc6b6b160c7adb8f2bf617d2c80ea", + "zh:269eda8fe42daa7974d5a34d166c3ba9defe80cde86c01e4dadcfdf2e1f05e5f", + "zh:373f7c65566f8f2cc7f45d698654feb9d988996957e1266a69ca00c52d6d16d0", + "zh:5599d16804c41c83009ec621b6d6b6f74e102f5827678a4750f8809055546b61", + "zh:583be0440469a22bff70dcfa56593b01566860b29607437264adb51060cf46fc", + "zh:5f211d8ec3f2e1f414870d9584bfe26e6995560ef81c748f8447a48164767398", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:7b547fd16216761ef86efc3ed516ac5ac0c5c42b7c7eb24a08cef2d93f69ed5e", + "zh:7e7c0679daf2a382151d05068c8c3f0dae6b7b7dccf818827b73dd08638df2ef", + "zh:8089dec888a8038b9b4fb23b3df7e1057293dbc5b60b42cc47ff690d69d4b61b", + "zh:c51f15a031edfd6f23ce8ced3446ca7f8d8d647e2499890d7d5d10d5016d7257", + "zh:c94784f005708890dc6895afd53636ec00ec1e430b15d41e5aebfb1d4b39bd04", + ] +} diff --git a/infra/aws/README.md b/infra/aws/README.md new file mode 100644 index 0000000..75a451a --- /dev/null +++ b/infra/aws/README.md @@ -0,0 +1,120 @@ +# gitlawb node on AWS (Terraform) + +Single EC2 instance running the published node image + Postgres via Docker +compose, with a persistent encrypted EBS volume, Elastic IP, SSM access, and +daily snapshots. + +``` +Elastic IP ──► EC2 t4g.small (Amazon Linux 2023, arm64) + 7545/tcp docker compose: + 7546/udp ├─ node (ghcr.io/gitlawb/node, pulled — not built) + └─ postgres:16-alpine + EBS gp3 volume mounted at /mnt/data + ├─ node/ → container /data (repos + identity key) + └─ postgres/ → postgres data dir +``` + +## Prerequisites + +- Terraform ≥ 1.6 +- AWS credentials configured (`aws sts get-caller-identity` works) +- AWS CLI + [Session Manager plugin](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-working-with-install-plugin.html) (for shell access) +- A default VPC in the target region (or pass `subnet_id`) + +## Quick start + +```sh +cd infra/aws +cp terraform.tfvars.example terraform.tfvars # edit: public_url at minimum +terraform init +terraform plan +terraform apply # ⚠ creates billable resources (~$25/mo: EC2 + EBS + snapshots) +``` + +After apply (~3-5 min for first boot to pull images and start): + +```sh +curl "$(terraform output -raw api_url)/health" +``` + +## ⚠ First boot: back up the identity key + +The node generates `/data/keys/identity.pem` on first start — it defines the +node's DID. **Losing it permanently changes the node's identity.** Back it up +immediately: + +```sh +$(terraform output -raw ssm_session_command) +# in the session: +sudo cat /mnt/data/node/keys/identity.pem +``` + +Store the key somewhere safe (password manager / offline). The volume's +`prevent_destroy` guard and daily DLM snapshots protect against accidents, but +are not a substitute for an offline backup. + +## Shell access + +SSM Session Manager — no SSH port, no keys to manage: + +```sh +$(terraform output -raw ssm_session_command) +``` + +Bootstrap log: `/var/log/gitlawb-bootstrap.log`. Stack lives in `/opt/gitlawb` +(`docker compose ps`, `docker compose logs node`). + +SSH is off by default; set `ssh_ingress_cidr` + `ssh_key_name` if you need it. + +## Upgrading the node + +User-data only runs at first boot, so upgrades go through SSM: + +```sh +$(terraform output -raw upgrade_command) +``` + +This runs `docker compose pull && docker compose up -d` on the instance. + +- With `image_tag = "latest"` (default) that picks up the newest release. +- With a **pinned tag**, first edit the tag in `/opt/gitlawb/compose.yaml` on + the instance (via SSM session), then run the upgrade command — and keep + `image_tag` in terraform.tfvars in sync so a future instance replacement + boots the same version. + +Replace the instance itself (OS/AMI/instance-type changes) with +`terraform apply -replace=aws_instance.node` — the data volume reattaches and +`/data` (including the identity key) survives. + +## Remote state (optional) + +Local state is the default. To move state to S3: create a versioned bucket, +uncomment the `backend "s3"` block in `versions.tf`, then: + +```sh +terraform init -migrate-state +``` + +## Teardown + +`terraform destroy` will **fail on the data volume by design** +(`prevent_destroy`). To tear everything down: + +1. Back up the identity key (above) and take a final snapshot if you may return. +2. Remove the `prevent_destroy` line from `aws_ebs_volume.data` in `main.tf`. +3. `terraform destroy`. + +Note: DLM snapshots created by the policy are not deleted by destroy — clean +them up in the EC2 console if unwanted. The Elastic IP is released on destroy. + +## Security notes + +- Postgres password: generated by Terraform, stored as an SSM SecureString, + fetched at boot via the instance profile — never in user-data or state-free + files on disk (only in `/opt/gitlawb/.env`, mode 600). It IS in Terraform + state — treat state as sensitive (another reason for the S3 backend). +- Sensitive optional vars (`operator_private_key`, `pinata_jwt`, + `s3_secret_access_key`) follow the same SSM path. +- IMDSv2 is required; metrics port is closed unless `metrics_ingress_cidr` is set. +- The node serves plain HTTP on 7545. For TLS, put a DNS name + proxy + (ALB/CloudFront/Caddy) in front and set `public_url` accordingly. diff --git a/infra/aws/compose.yaml.tftpl b/infra/aws/compose.yaml.tftpl new file mode 100644 index 0000000..575c36a --- /dev/null +++ b/infra/aws/compose.yaml.tftpl @@ -0,0 +1,57 @@ +# Rendered by Terraform (compose.yaml.tftpl) and written to /opt/gitlawb/compose.yaml +# by user-data. Adapted from the repo-root docker-compose.yml: pulls the published +# image instead of building, and binds data dirs to the dedicated EBS volume. +# `$${VAR}` entries are resolved by docker compose from /opt/gitlawb/.env at runtime. + +services: + postgres: + image: postgres:16-alpine + environment: + POSTGRES_DB: ${pg_db} + POSTGRES_USER: ${pg_user} + POSTGRES_PASSWORD: $${POSTGRES_PASSWORD} + volumes: + - /mnt/data/postgres:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${pg_user}"] + interval: 10s + timeout: 5s + retries: 5 + restart: unless-stopped + + node: + image: ${image_repo}:${image_tag} + depends_on: + postgres: + condition: service_healthy + ports: + - "${gitlawb_port}:${gitlawb_port}" # HTTP API + git smart-HTTP + - "${p2p_port}:${p2p_port}/udp" # libp2p QUIC +%{ if expose_metrics ~} + - "${metrics_port}:${metrics_port}" # Prometheus /metrics +%{ endif ~} + volumes: + - /mnt/data/node:/data + environment: + DATABASE_URL: postgresql://${pg_user}:$${POSTGRES_PASSWORD}@postgres:5432/${pg_db} + GITLAWB_HOST: 0.0.0.0 + GITLAWB_PORT: "${gitlawb_port}" + GITLAWB_P2P_PORT: "${p2p_port}" + GITLAWB_REPOS_DIR: /data/repos + GITLAWB_KEY: /data/keys/identity.pem + GITLAWB_PUBLIC_URL: $${GITLAWB_PUBLIC_URL} + GITLAWB_BOOTSTRAP_PEERS: $${GITLAWB_BOOTSTRAP_PEERS} + GITLAWB_AUTO_SYNC: $${GITLAWB_AUTO_SYNC} + GITLAWB_MAX_PACK_BYTES: $${GITLAWB_MAX_PACK_BYTES} + # On-chain PoS (optional — empty unless set in terraform.tfvars) + GITLAWB_CHAIN_RPC_URL: $${GITLAWB_CHAIN_RPC_URL} + GITLAWB_CONTRACT_NODE_STAKING: $${GITLAWB_CONTRACT_NODE_STAKING} + GITLAWB_OPERATOR_PRIVATE_KEY: $${GITLAWB_OPERATOR_PRIVATE_KEY} + # IPFS pinning (optional) + GITLAWB_PINATA_JWT: $${GITLAWB_PINATA_JWT} + # Shared S3-compatible pack storage (optional) + GITLAWB_TIGRIS_BUCKET: $${GITLAWB_TIGRIS_BUCKET} + AWS_ACCESS_KEY_ID: $${S3_ACCESS_KEY_ID} + AWS_SECRET_ACCESS_KEY: $${S3_SECRET_ACCESS_KEY} + AWS_ENDPOINT_URL_S3: $${S3_ENDPOINT_URL} + restart: unless-stopped diff --git a/infra/aws/main.tf b/infra/aws/main.tf new file mode 100644 index 0000000..d2a8389 --- /dev/null +++ b/infra/aws/main.tf @@ -0,0 +1,359 @@ +# Single-EC2 gitlawb node: Docker compose (node + postgres) on Amazon Linux 2023, +# persistent EBS data volume, Elastic IP, SSM access, daily snapshots. + +# --------------------------------------------------------------------------- +# Network placement (default VPC unless subnet_id is set) +# --------------------------------------------------------------------------- + +data "aws_vpc" "default" { + default = true +} + +data "aws_subnets" "default" { + filter { + name = "vpc-id" + values = [data.aws_vpc.default.id] + } +} + +locals { + subnet_id = coalesce(var.subnet_id, sort(data.aws_subnets.default.ids)[0]) + expose_metrics = var.metrics_ingress_cidr != null + common_tags = merge({ Project = "gitlawb-node", ManagedBy = "terraform" }, var.tags) +} + +data "aws_subnet" "selected" { + id = local.subnet_id +} + +# Latest Amazon Linux 2023 arm64 AMI (pairs with t4g/Graviton; node image is +# multi-arch). ignore_changes on the instance AMI avoids churn on AL releases. +data "aws_ssm_parameter" "al2023_arm64" { + name = "/aws/service/ami-amazon-linux-latest/al2023-ami-kernel-default-arm64" +} + +# --------------------------------------------------------------------------- +# Secrets (SSM Parameter Store — never baked into user-data) +# --------------------------------------------------------------------------- + +resource "random_password" "postgres" { + length = 32 + special = false +} + +resource "aws_ssm_parameter" "postgres_password" { + name = "/${var.name_prefix}/postgres_password" + type = "SecureString" + value = random_password.postgres.result + tags = local.common_tags +} + +resource "aws_ssm_parameter" "operator_key" { + count = var.operator_private_key != "" ? 1 : 0 + name = "/${var.name_prefix}/operator_private_key" + type = "SecureString" + value = var.operator_private_key + tags = local.common_tags +} + +resource "aws_ssm_parameter" "pinata_jwt" { + count = var.pinata_jwt != "" ? 1 : 0 + name = "/${var.name_prefix}/pinata_jwt" + type = "SecureString" + value = var.pinata_jwt + tags = local.common_tags +} + +resource "aws_ssm_parameter" "s3_secret" { + count = var.s3_secret_access_key != "" ? 1 : 0 + name = "/${var.name_prefix}/s3_secret_access_key" + type = "SecureString" + value = var.s3_secret_access_key + tags = local.common_tags +} + +locals { + secret_param_arns = concat( + [aws_ssm_parameter.postgres_password.arn], + aws_ssm_parameter.operator_key[*].arn, + aws_ssm_parameter.pinata_jwt[*].arn, + aws_ssm_parameter.s3_secret[*].arn, + ) +} + +# --------------------------------------------------------------------------- +# IAM: SSM Session Manager access + least-privilege read of our parameters. +# (AWS-managed aws/ssm KMS key needs no explicit kms:Decrypt grant.) +# --------------------------------------------------------------------------- + +resource "aws_iam_role" "node" { + name = "${var.name_prefix}-instance" + tags = local.common_tags + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = "ec2.amazonaws.com" } + Action = "sts:AssumeRole" + }] + }) +} + +resource "aws_iam_role_policy_attachment" "ssm_core" { + role = aws_iam_role.node.name + policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" +} + +resource "aws_iam_role_policy" "ssm_params_read" { + name = "read-gitlawb-secrets" + role = aws_iam_role.node.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = ["ssm:GetParameter"] + Resource = local.secret_param_arns + }] + }) +} + +resource "aws_iam_instance_profile" "node" { + name = "${var.name_prefix}-instance" + role = aws_iam_role.node.name +} + +# --------------------------------------------------------------------------- +# Security group +# --------------------------------------------------------------------------- + +resource "aws_security_group" "node" { + name = "${var.name_prefix}-sg" + description = "gitlawb node: HTTP API + libp2p UDP" + vpc_id = data.aws_vpc.default.id + tags = local.common_tags + + ingress { + description = "HTTP API + git smart-HTTP" + from_port = var.gitlawb_port + to_port = var.gitlawb_port + protocol = "tcp" + cidr_blocks = [var.api_ingress_cidr] + } + + ingress { + description = "libp2p QUIC" + from_port = var.gitlawb_p2p_port + to_port = var.gitlawb_p2p_port + protocol = "udp" + cidr_blocks = [var.p2p_ingress_cidr] + } + + dynamic "ingress" { + for_each = local.expose_metrics ? [1] : [] + content { + description = "Prometheus metrics" + from_port = var.metrics_port + to_port = var.metrics_port + protocol = "tcp" + cidr_blocks = [var.metrics_ingress_cidr] + } + } + + dynamic "ingress" { + for_each = var.ssh_ingress_cidr != null ? [1] : [] + content { + description = "SSH (prefer SSM Session Manager)" + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = [var.ssh_ingress_cidr] + } + } + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +# --------------------------------------------------------------------------- +# Persistent data volume — survives instance replacement. prevent_destroy +# guards repos, postgres data, and the node identity key (/data/keys). +# --------------------------------------------------------------------------- + +resource "aws_ebs_volume" "data" { + availability_zone = data.aws_subnet.selected.availability_zone + size = var.data_volume_size_gb + type = var.data_volume_type + encrypted = true + tags = merge(local.common_tags, { + Name = "${var.name_prefix}-data" + Snapshot = "true" # targeted by the DLM snapshot policy + }) + + lifecycle { + prevent_destroy = true + } +} + +# --------------------------------------------------------------------------- +# Instance + Elastic IP +# --------------------------------------------------------------------------- + +resource "aws_eip" "node" { + domain = "vpc" + tags = merge(local.common_tags, { Name = var.name_prefix }) +} + +locals { + public_url = var.public_url != "" ? var.public_url : "http://${aws_eip.node.public_ip}:${var.gitlawb_port}" + + compose_yaml = templatefile("${path.module}/compose.yaml.tftpl", { + image_repo = var.image_repo + image_tag = var.image_tag + gitlawb_port = var.gitlawb_port + p2p_port = var.gitlawb_p2p_port + metrics_port = var.metrics_port + expose_metrics = local.expose_metrics + pg_user = var.postgres_user + pg_db = var.postgres_db + }) + + user_data = templatefile("${path.module}/user-data.sh.tftpl", { + region = var.region + pg_password_param = aws_ssm_parameter.postgres_password.name + operator_key_param = try(aws_ssm_parameter.operator_key[0].name, "") + pinata_jwt_param = try(aws_ssm_parameter.pinata_jwt[0].name, "") + s3_secret_param = try(aws_ssm_parameter.s3_secret[0].name, "") + public_url = local.public_url + bootstrap_peers = var.bootstrap_peers + auto_sync = var.auto_sync + max_pack_bytes = var.max_pack_bytes + chain_rpc_url = var.chain_rpc_url + contract_node_staking = var.contract_node_staking + tigris_bucket = var.tigris_bucket + s3_access_key_id = var.s3_access_key_id + s3_endpoint_url = var.s3_endpoint_url + compose_yaml = local.compose_yaml + }) +} + +resource "aws_instance" "node" { + ami = nonsensitive(data.aws_ssm_parameter.al2023_arm64.value) + instance_type = var.instance_type + subnet_id = local.subnet_id + vpc_security_group_ids = [aws_security_group.node.id] + iam_instance_profile = aws_iam_instance_profile.node.name + key_name = var.ssh_key_name + user_data = local.user_data + tags = merge(local.common_tags, { Name = var.name_prefix }) + + metadata_options { + http_endpoint = "enabled" + http_tokens = "required" # IMDSv2 only + } + + root_block_device { + volume_size = 8 + volume_type = "gp3" + encrypted = true + } + + lifecycle { + # New AL2023 AMI releases shouldn't force-replace the instance; replace + # deliberately (taint) for OS upgrades. user_data changes also only apply + # at first boot — use the upgrade SSM command for image bumps. + ignore_changes = [ami] + } +} + +resource "aws_volume_attachment" "data" { + device_name = "/dev/sdf" # presented as /dev/nvme1n1 on Nitro; user-data discovers it + volume_id = aws_ebs_volume.data.id + instance_id = aws_instance.node.id +} + +resource "aws_eip_association" "node" { + instance_id = aws_instance.node.id + allocation_id = aws_eip.node.id +} + +# --------------------------------------------------------------------------- +# Daily EBS snapshots of the data volume (DLM) +# --------------------------------------------------------------------------- + +resource "aws_iam_role" "dlm" { + name = "${var.name_prefix}-dlm" + tags = local.common_tags + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = "dlm.amazonaws.com" } + Action = "sts:AssumeRole" + }] + }) +} + +resource "aws_iam_role_policy_attachment" "dlm" { + role = aws_iam_role.dlm.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSDataLifecycleManagerServiceRole" +} + +resource "aws_dlm_lifecycle_policy" "data" { + description = "Daily snapshots of the gitlawb data volume" + execution_role_arn = aws_iam_role.dlm.arn + state = "ENABLED" + tags = local.common_tags + + policy_details { + resource_types = ["VOLUME"] + target_tags = { Snapshot = "true" } + + schedule { + name = "daily" + + create_rule { + interval = 24 + interval_unit = "HOURS" + times = ["05:00"] + } + + retain_rule { + count = var.snapshot_retain_count + } + + copy_tags = true + } + } +} + +# --------------------------------------------------------------------------- +# Upgrade runbook as code: pull + restart the compose stack via SSM +# --------------------------------------------------------------------------- + +resource "aws_ssm_document" "upgrade" { + name = "${var.name_prefix}-upgrade" + document_type = "Command" + document_format = "JSON" + tags = local.common_tags + + content = jsonencode({ + schemaVersion = "2.2" + description = "Pull the latest gitlawb node image and restart the compose stack" + mainSteps = [{ + action = "aws:runShellScript" + name = "upgrade" + inputs = { + runCommand = [ + "cd /opt/gitlawb && docker compose pull && docker compose up -d --remove-orphans && docker image prune -f" + ] + } + }] + }) +} diff --git a/infra/aws/outputs.tf b/infra/aws/outputs.tf new file mode 100644 index 0000000..0d25530 --- /dev/null +++ b/infra/aws/outputs.tf @@ -0,0 +1,34 @@ +output "elastic_ip" { + description = "Public IP of the node" + value = aws_eip.node.public_ip +} + +output "api_url" { + description = "HTTP API endpoint" + value = "http://${aws_eip.node.public_ip}:${var.gitlawb_port}" +} + +output "instance_id" { + description = "EC2 instance ID" + value = aws_instance.node.id +} + +output "data_volume_id" { + description = "Persistent EBS data volume (protected by prevent_destroy)" + value = aws_ebs_volume.data.id +} + +output "postgres_password_ssm_param" { + description = "SSM parameter holding the postgres password (value not shown)" + value = aws_ssm_parameter.postgres_password.name +} + +output "ssm_session_command" { + description = "Open a shell on the instance" + value = "aws ssm start-session --target ${aws_instance.node.id} --region ${var.region}" +} + +output "upgrade_command" { + description = "Pull the latest node image and restart the stack" + value = "aws ssm send-command --document-name ${aws_ssm_document.upgrade.name} --targets Key=InstanceIds,Values=${aws_instance.node.id} --region ${var.region}" +} diff --git a/infra/aws/terraform.tfvars.example b/infra/aws/terraform.tfvars.example new file mode 100644 index 0000000..0733d99 --- /dev/null +++ b/infra/aws/terraform.tfvars.example @@ -0,0 +1,42 @@ +# Copy to terraform.tfvars and adjust. terraform.tfvars is gitignored. + +region = "us-east-1" +name_prefix = "gitlawb-node" +instance_type = "t4g.small" # ARM/Graviton; node image is multi-arch + +# --- Image ----------------------------------------------------------------- +# image_repo = "ghcr.io/gitlawb/node" +# image_tag = "latest" # pin a version for production, e.g. "0.9.2" + +# --- Storage --------------------------------------------------------------- +# data_volume_size_gb = 20 +# snapshot_retain_count = 7 + +# --- Node configuration (defaults mirror infra/fly/fly.toml) --------------- +# Set to your real DNS name for production; defaults to http://:7545 +# public_url = "https://node.example.com" + +# bootstrap_peers = "https://node.gitlawb.com,https://node2.gitlawb.com,https://node3.gitlawb.com" +# auto_sync = "true" +# max_pack_bytes = "524288000" + +# --- Ingress --------------------------------------------------------------- +# api_ingress_cidr = "0.0.0.0/0" # public node +# p2p_ingress_cidr = "0.0.0.0/0" +# metrics_ingress_cidr = "203.0.113.10/32" # your Prometheus scraper; null = closed +# ssh_ingress_cidr = null # keep null — use SSM Session Manager +# ssh_key_name = null + +# --- Optional: on-chain PoS operator ---------------------------------------- +# chain_rpc_url = "https://mainnet.base.org" +# contract_node_staking = "0x..." +# operator_private_key = "0x..." # stored in SSM SecureString, not user-data + +# --- Optional: IPFS / shared storage ---------------------------------------- +# pinata_jwt = "..." +# tigris_bucket = "my-bucket" +# s3_access_key_id = "..." +# s3_secret_access_key = "..." # stored in SSM SecureString +# s3_endpoint_url = "https://fly.storage.tigris.dev" + +# tags = { Environment = "production" } diff --git a/infra/aws/user-data.sh.tftpl b/infra/aws/user-data.sh.tftpl new file mode 100644 index 0000000..e19aab4 --- /dev/null +++ b/infra/aws/user-data.sh.tftpl @@ -0,0 +1,97 @@ +#!/bin/bash +# Rendered by Terraform (user-data.sh.tftpl). Runs once at first boot via cloud-init. +set -euxo pipefail +exec > >(tee -a /var/log/gitlawb-bootstrap.log) 2>&1 + +COMPOSE_VERSION="v2.32.4" + +# --------------------------------------------------------------------------- +# Docker + compose plugin (Amazon Linux 2023) +# --------------------------------------------------------------------------- +dnf install -y docker +systemctl enable --now docker +mkdir -p /usr/local/lib/docker/cli-plugins +curl -fsSL "https://github.com/docker/compose/releases/download/$COMPOSE_VERSION/docker-compose-linux-$(uname -m)" \ + -o /usr/local/lib/docker/cli-plugins/docker-compose +chmod +x /usr/local/lib/docker/cli-plugins/docker-compose + +# --------------------------------------------------------------------------- +# Data volume: find the attached EBS volume (Nitro presents /dev/sdf as NVMe; +# attachment can lag first boot, so retry). The data disk is the one that is +# not the root disk (no mounted partitions). +# --------------------------------------------------------------------------- +DATA_DEV="" +for i in $(seq 1 60); do + for dev in $(lsblk -dpno NAME,TYPE | awk '$2 == "disk" { print $1 }'); do + if ! lsblk -no MOUNTPOINTS "$dev" | grep -q .; then + DATA_DEV="$dev" + break + fi + done + [ -n "$DATA_DEV" ] && break + sleep 5 +done +if [ -z "$DATA_DEV" ]; then + echo "FATAL: data EBS volume never attached" >&2 + exit 1 +fi + +# Format ONLY on first boot (no existing filesystem). Never reformat on +# reattach — this is what preserves repos + the node identity key. +if ! blkid "$DATA_DEV"; then + mkfs.ext4 -L gitlawb-data "$DATA_DEV" +fi +mkdir -p /mnt/data +grep -q 'LABEL=gitlawb-data' /etc/fstab || \ + echo 'LABEL=gitlawb-data /mnt/data ext4 defaults,nofail 0 2' >> /etc/fstab +mount -a + +# Node container runs as uid/gid 1000 (gitlawb). Postgres manages its own dir. +mkdir -p /mnt/data/node/repos /mnt/data/node/keys /mnt/data/postgres +chown -R 1000:1000 /mnt/data/node + +# --------------------------------------------------------------------------- +# Secrets from SSM Parameter Store (instance profile grants read on these only) +# --------------------------------------------------------------------------- +fetch_param() { + if [ -z "$1" ]; then echo ""; return; fi + aws ssm get-parameter --name "$1" --with-decryption \ + --region '${region}' --query Parameter.Value --output text +} +PGPASS="$(fetch_param '${pg_password_param}')" +OPERATOR_KEY="$(fetch_param '${operator_key_param}')" +PINATA_JWT="$(fetch_param '${pinata_jwt_param}')" +S3_SECRET="$(fetch_param '${s3_secret_param}')" + +# --------------------------------------------------------------------------- +# Compose stack +# --------------------------------------------------------------------------- +mkdir -p /opt/gitlawb + +cat > /opt/gitlawb/.env < /opt/gitlawb/compose.yaml <<'COMPOSE_EOF' +${compose_yaml} +COMPOSE_EOF + +cd /opt/gitlawb +docker compose pull +docker compose up -d + +echo "gitlawb-node bootstrap complete" diff --git a/infra/aws/variables.tf b/infra/aws/variables.tf new file mode 100644 index 0000000..7e9ecd4 --- /dev/null +++ b/infra/aws/variables.tf @@ -0,0 +1,223 @@ +# --------------------------------------------------------------------------- +# General +# --------------------------------------------------------------------------- + +variable "region" { + description = "AWS region to deploy into" + type = string + default = "us-east-1" +} + +variable "name_prefix" { + description = "Prefix for resource names and tags" + type = string + default = "gitlawb-node" +} + +variable "tags" { + description = "Extra tags applied to all resources" + type = map(string) + default = {} +} + +variable "subnet_id" { + description = "Subnet to launch into. Defaults to the first subnet of the default VPC." + type = string + default = null +} + +# --------------------------------------------------------------------------- +# Compute & storage +# --------------------------------------------------------------------------- + +variable "instance_type" { + description = "EC2 instance type (ARM/Graviton — the node image is multi-arch)" + type = string + default = "t4g.small" +} + +variable "data_volume_size_gb" { + description = "Size of the persistent /data EBS volume in GB (repos, identity key, postgres)" + type = number + default = 20 +} + +variable "data_volume_type" { + description = "EBS volume type for the data volume" + type = string + default = "gp3" +} + +variable "snapshot_retain_count" { + description = "How many daily EBS snapshots of the data volume to retain" + type = number + default = 7 +} + +# --------------------------------------------------------------------------- +# Node image +# --------------------------------------------------------------------------- + +variable "image_repo" { + description = "Container image repository for the node (public, multi-arch)" + type = string + default = "ghcr.io/gitlawb/node" +} + +variable "image_tag" { + description = "Image tag to run. After changing, run the upgrade SSM command (see outputs) — user-data only runs at first boot." + type = string + default = "latest" +} + +# --------------------------------------------------------------------------- +# Networking / ingress +# --------------------------------------------------------------------------- + +variable "gitlawb_port" { + description = "HTTP API port" + type = number + default = 7545 +} + +variable "gitlawb_p2p_port" { + description = "libp2p UDP port" + type = number + default = 7546 +} + +variable "metrics_port" { + description = "Prometheus metrics port" + type = number + default = 9091 +} + +variable "api_ingress_cidr" { + description = "CIDR allowed to reach the HTTP API (public node by default)" + type = string + default = "0.0.0.0/0" +} + +variable "p2p_ingress_cidr" { + description = "CIDR allowed to reach the p2p UDP port" + type = string + default = "0.0.0.0/0" +} + +variable "metrics_ingress_cidr" { + description = "CIDR allowed to scrape /metrics. null = metrics port not exposed." + type = string + default = null +} + +variable "ssh_ingress_cidr" { + description = "CIDR allowed to SSH. null = no SSH (use SSM Session Manager)." + type = string + default = null +} + +variable "ssh_key_name" { + description = "Existing EC2 key pair name for SSH. Only used if ssh_ingress_cidr is set." + type = string + default = null +} + +# --------------------------------------------------------------------------- +# Node configuration (defaults mirror infra/fly/fly.toml) +# --------------------------------------------------------------------------- + +variable "public_url" { + description = "Public URL of this node (GITLAWB_PUBLIC_URL). Leave empty to default to http://: after apply — set a real DNS name for production." + type = string + default = "" +} + +variable "bootstrap_peers" { + description = "Comma-separated bootstrap peer URLs (GITLAWB_BOOTSTRAP_PEERS)" + type = string + default = "https://node.gitlawb.com,https://node2.gitlawb.com,https://node3.gitlawb.com" +} + +variable "auto_sync" { + description = "GITLAWB_AUTO_SYNC" + type = string + default = "true" +} + +variable "max_pack_bytes" { + description = "GITLAWB_MAX_PACK_BYTES (500MB, matching the Fly deployment)" + type = string + default = "524288000" +} + +# --------------------------------------------------------------------------- +# Postgres +# --------------------------------------------------------------------------- + +variable "postgres_user" { + description = "Postgres user for the node database" + type = string + default = "gitlawb" +} + +variable "postgres_db" { + description = "Postgres database name" + type = string + default = "gitlawb" +} + +# --------------------------------------------------------------------------- +# Optional integrations (all off by default; see .env.example at repo root) +# --------------------------------------------------------------------------- + +variable "chain_rpc_url" { + description = "Base L2 RPC URL (optional, PoS operator)" + type = string + default = "" +} + +variable "contract_node_staking" { + description = "Node staking contract address (optional)" + type = string + default = "" +} + +variable "operator_private_key" { + description = "PoS operator private key (optional). Stored in SSM, not in user-data." + type = string + default = "" + sensitive = true +} + +variable "pinata_jwt" { + description = "Pinata JWT for IPFS pinning (optional). Stored in SSM, not in user-data." + type = string + default = "" + sensitive = true +} + +variable "tigris_bucket" { + description = "S3-compatible bucket for pack storage (optional, GITLAWB_TIGRIS_BUCKET)" + type = string + default = "" +} + +variable "s3_access_key_id" { + description = "Access key for the S3-compatible bucket (optional)" + type = string + default = "" + sensitive = true +} + +variable "s3_secret_access_key" { + description = "Secret key for the S3-compatible bucket (optional). Stored in SSM, not in user-data." + type = string + default = "" + sensitive = true +} + +variable "s3_endpoint_url" { + description = "Custom S3 endpoint URL (optional, AWS_ENDPOINT_URL_S3)" + type = string + default = "" +} diff --git a/infra/aws/versions.tf b/infra/aws/versions.tf new file mode 100644 index 0000000..158d872 --- /dev/null +++ b/infra/aws/versions.tf @@ -0,0 +1,28 @@ +terraform { + required_version = ">= 1.6" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.6" + } + } + + # Remote state (optional). Create the bucket first, then uncomment and run + # `terraform init -migrate-state`. See README.md "Remote state". + # + # backend "s3" { + # bucket = "gitlawb-terraform-state" + # key = "infra/aws/terraform.tfstate" + # region = "us-east-1" + # use_lockfile = true + # } +} + +provider "aws" { + region = var.region +} From b7c602d62fea3bc1d8339a7866122d881e8ca149 Mon Sep 17 00:00:00 2001 From: Kevin Codex Date: Fri, 5 Jun 2026 18:31:03 +0800 Subject: [PATCH 3/6] fix(infra): address CodeRabbit review feedback - api_url output now honors the public_url override (local.public_url) - disable xtrace around SSM secret fetch + .env write so secret values never reach the bootstrap log - add retry/timeout flags to the docker compose download - validate snapshot_retain_count (1-1000) at plan time - add language tags to fenced tree blocks (MD040) Co-Authored-By: OpenClaude --- infra/README.md | 2 +- infra/aws/README.md | 2 +- infra/aws/outputs.tf | 4 ++-- infra/aws/user-data.sh.tftpl | 7 ++++++- infra/aws/variables.tf | 5 +++++ 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/infra/README.md b/infra/README.md index aa0ca47..876a1f4 100644 --- a/infra/README.md +++ b/infra/README.md @@ -2,7 +2,7 @@ Deployment configuration, organized by target — one subdirectory per platform. -``` +```text infra/ ├── fly/ │ └── fly.toml # Fly.io app config (gitlawb-node-test) diff --git a/infra/aws/README.md b/infra/aws/README.md index 75a451a..7afe162 100644 --- a/infra/aws/README.md +++ b/infra/aws/README.md @@ -4,7 +4,7 @@ Single EC2 instance running the published node image + Postgres via Docker compose, with a persistent encrypted EBS volume, Elastic IP, SSM access, and daily snapshots. -``` +```text Elastic IP ──► EC2 t4g.small (Amazon Linux 2023, arm64) 7545/tcp docker compose: 7546/udp ├─ node (ghcr.io/gitlawb/node, pulled — not built) diff --git a/infra/aws/outputs.tf b/infra/aws/outputs.tf index 0d25530..f8b3c5d 100644 --- a/infra/aws/outputs.tf +++ b/infra/aws/outputs.tf @@ -4,8 +4,8 @@ output "elastic_ip" { } output "api_url" { - description = "HTTP API endpoint" - value = "http://${aws_eip.node.public_ip}:${var.gitlawb_port}" + description = "HTTP API endpoint (honors the public_url override)" + value = local.public_url } output "instance_id" { diff --git a/infra/aws/user-data.sh.tftpl b/infra/aws/user-data.sh.tftpl index e19aab4..9e49ef0 100644 --- a/infra/aws/user-data.sh.tftpl +++ b/infra/aws/user-data.sh.tftpl @@ -11,7 +11,9 @@ COMPOSE_VERSION="v2.32.4" dnf install -y docker systemctl enable --now docker mkdir -p /usr/local/lib/docker/cli-plugins -curl -fsSL "https://github.com/docker/compose/releases/download/$COMPOSE_VERSION/docker-compose-linux-$(uname -m)" \ +curl -fsSL --retry 5 --retry-delay 3 --retry-all-errors \ + --connect-timeout 10 --max-time 300 \ + "https://github.com/docker/compose/releases/download/$COMPOSE_VERSION/docker-compose-linux-$(uname -m)" \ -o /usr/local/lib/docker/cli-plugins/docker-compose chmod +x /usr/local/lib/docker/cli-plugins/docker-compose @@ -52,7 +54,9 @@ chown -R 1000:1000 /mnt/data/node # --------------------------------------------------------------------------- # Secrets from SSM Parameter Store (instance profile grants read on these only) +# xtrace is disabled through the .env write so secret values never hit the log. # --------------------------------------------------------------------------- +set +x fetch_param() { if [ -z "$1" ]; then echo ""; return; fi aws ssm get-parameter --name "$1" --with-decryption \ @@ -84,6 +88,7 @@ S3_SECRET_ACCESS_KEY=$S3_SECRET S3_ENDPOINT_URL=${s3_endpoint_url} ENV_EOF chmod 600 /opt/gitlawb/.env +set -x # Quoted heredoc: $${VAR} placeholders inside stay literal for docker compose. cat > /opt/gitlawb/compose.yaml <<'COMPOSE_EOF' diff --git a/infra/aws/variables.tf b/infra/aws/variables.tf index 7e9ecd4..c7e9471 100644 --- a/infra/aws/variables.tf +++ b/infra/aws/variables.tf @@ -52,6 +52,11 @@ variable "snapshot_retain_count" { description = "How many daily EBS snapshots of the data volume to retain" type = number default = 7 + + validation { + condition = var.snapshot_retain_count >= 1 && var.snapshot_retain_count <= 1000 + error_message = "snapshot_retain_count must be between 1 and 1000 (DLM retain rule limits)." + } } # --------------------------------------------------------------------------- From 81dbc68d4e5efdfc10fa716c33a0e1c95df30f74 Mon Sep 17 00:00:00 2001 From: Kevin Codex Date: Fri, 5 Jun 2026 19:56:33 +0800 Subject: [PATCH 4/6] fix(infra): address PR #27 review feedback - security group now derives its VPC from the selected subnet, so subnet_id overrides into non-default VPCs work - s3_access_key_id moves to SSM SecureString (same path as the secret key) instead of being embedded in user-data and state - ignore user_data drift on the instance: it only runs at first boot, so re-rendering caused pointless stop/starts; README documents the -replace workflow for config changes - DLM snapshot targeting is stack-specific (Snapshot + Name tags), not any Snapshot=true volume in the account - comment in fly.toml that it targets the shared test instance - user-data compose up gains --remove-orphans (consistent with the upgrade SSM document) - node service gets an HTTP /health healthcheck in compose Co-Authored-By: OpenClaude --- infra/aws/README.md | 15 ++++++++++++++ infra/aws/compose.yaml.tftpl | 6 ++++++ infra/aws/main.tf | 32 +++++++++++++++++++++++------- infra/aws/terraform.tfvars.example | 4 ++-- infra/aws/user-data.sh.tftpl | 5 +++-- infra/fly/fly.toml | 2 ++ 6 files changed, 53 insertions(+), 11 deletions(-) diff --git a/infra/aws/README.md b/infra/aws/README.md index 7afe162..754f559 100644 --- a/infra/aws/README.md +++ b/infra/aws/README.md @@ -86,6 +86,21 @@ Replace the instance itself (OS/AMI/instance-type changes) with `terraform apply -replace=aws_instance.node` — the data volume reattaches and `/data` (including the identity key) survives. +## Changing configuration + +User-data only runs at first boot, and the instance ignores `user_data` drift +(`ignore_changes`), so editing terraform.tfvars values that feed the bootstrap +(`bootstrap_peers`, `public_url`, integrations, `image_tag`) does **not** +affect a running instance on `terraform apply`. To roll out such changes, +either edit `/opt/gitlawb/.env` on the instance (SSM session, then +`docker compose up -d`), or replace the instance: + +```sh +terraform apply -replace=aws_instance.node +``` + +The data volume reattaches; repos, postgres data, and the identity key survive. + ## Remote state (optional) Local state is the default. To move state to S3: create a versioned bucket, diff --git a/infra/aws/compose.yaml.tftpl b/infra/aws/compose.yaml.tftpl index 575c36a..8894074 100644 --- a/infra/aws/compose.yaml.tftpl +++ b/infra/aws/compose.yaml.tftpl @@ -54,4 +54,10 @@ services: AWS_ACCESS_KEY_ID: $${S3_ACCESS_KEY_ID} AWS_SECRET_ACCESS_KEY: $${S3_SECRET_ACCESS_KEY} AWS_ENDPOINT_URL_S3: $${S3_ENDPOINT_URL} + healthcheck: + test: ["CMD-SHELL", "curl -sf http://localhost:${gitlawb_port}/health || exit 1"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 30s restart: unless-stopped diff --git a/infra/aws/main.tf b/infra/aws/main.tf index d2a8389..a1e65b1 100644 --- a/infra/aws/main.tf +++ b/infra/aws/main.tf @@ -72,12 +72,21 @@ resource "aws_ssm_parameter" "s3_secret" { tags = local.common_tags } +resource "aws_ssm_parameter" "s3_access_key" { + count = var.s3_access_key_id != "" ? 1 : 0 + name = "/${var.name_prefix}/s3_access_key_id" + type = "SecureString" + value = var.s3_access_key_id + tags = local.common_tags +} + locals { secret_param_arns = concat( [aws_ssm_parameter.postgres_password.arn], aws_ssm_parameter.operator_key[*].arn, aws_ssm_parameter.pinata_jwt[*].arn, aws_ssm_parameter.s3_secret[*].arn, + aws_ssm_parameter.s3_access_key[*].arn, ) } @@ -131,7 +140,7 @@ resource "aws_iam_instance_profile" "node" { resource "aws_security_group" "node" { name = "${var.name_prefix}-sg" description = "gitlawb node: HTTP API + libp2p UDP" - vpc_id = data.aws_vpc.default.id + vpc_id = data.aws_subnet.selected.vpc_id # follows subnet_id overrides into non-default VPCs tags = local.common_tags ingress { @@ -229,6 +238,7 @@ locals { operator_key_param = try(aws_ssm_parameter.operator_key[0].name, "") pinata_jwt_param = try(aws_ssm_parameter.pinata_jwt[0].name, "") s3_secret_param = try(aws_ssm_parameter.s3_secret[0].name, "") + s3_access_key_param = try(aws_ssm_parameter.s3_access_key[0].name, "") public_url = local.public_url bootstrap_peers = var.bootstrap_peers auto_sync = var.auto_sync @@ -236,7 +246,6 @@ locals { chain_rpc_url = var.chain_rpc_url contract_node_staking = var.contract_node_staking tigris_bucket = var.tigris_bucket - s3_access_key_id = var.s3_access_key_id s3_endpoint_url = var.s3_endpoint_url compose_yaml = local.compose_yaml }) @@ -264,10 +273,14 @@ resource "aws_instance" "node" { } lifecycle { - # New AL2023 AMI releases shouldn't force-replace the instance; replace - # deliberately (taint) for OS upgrades. user_data changes also only apply - # at first boot — use the upgrade SSM command for image bumps. - ignore_changes = [ami] + # ami: new AL2023 releases shouldn't churn the instance; replace + # deliberately for OS upgrades. + # user_data: only runs at first boot, so re-rendering it on a live + # instance is a pointless stop/start. Config changes that feed user-data + # (bootstrap peers, integrations, image tag) require a deliberate + # `terraform apply -replace=aws_instance.node` — see README "Changing + # configuration". + ignore_changes = [ami, user_data] } } @@ -313,7 +326,12 @@ resource "aws_dlm_lifecycle_policy" "data" { policy_details { resource_types = ["VOLUME"] - target_tags = { Snapshot = "true" } + # Name makes the target stack-specific — a bare Snapshot=true would also + # match unrelated tagged volumes in a shared account. + target_tags = { + Snapshot = "true" + Name = "${var.name_prefix}-data" + } schedule { name = "daily" diff --git a/infra/aws/terraform.tfvars.example b/infra/aws/terraform.tfvars.example index 0733d99..82e5f89 100644 --- a/infra/aws/terraform.tfvars.example +++ b/infra/aws/terraform.tfvars.example @@ -35,8 +35,8 @@ instance_type = "t4g.small" # ARM/Graviton; node image is multi-arch # --- Optional: IPFS / shared storage ---------------------------------------- # pinata_jwt = "..." # tigris_bucket = "my-bucket" -# s3_access_key_id = "..." -# s3_secret_access_key = "..." # stored in SSM SecureString +# s3_access_key_id = "..." # stored in SSM SecureString, not user-data +# s3_secret_access_key = "..." # stored in SSM SecureString, not user-data # s3_endpoint_url = "https://fly.storage.tigris.dev" # tags = { Environment = "production" } diff --git a/infra/aws/user-data.sh.tftpl b/infra/aws/user-data.sh.tftpl index 9e49ef0..ba97800 100644 --- a/infra/aws/user-data.sh.tftpl +++ b/infra/aws/user-data.sh.tftpl @@ -66,6 +66,7 @@ PGPASS="$(fetch_param '${pg_password_param}')" OPERATOR_KEY="$(fetch_param '${operator_key_param}')" PINATA_JWT="$(fetch_param '${pinata_jwt_param}')" S3_SECRET="$(fetch_param '${s3_secret_param}')" +S3_KEY_ID="$(fetch_param '${s3_access_key_param}')" # --------------------------------------------------------------------------- # Compose stack @@ -83,7 +84,7 @@ GITLAWB_CONTRACT_NODE_STAKING=${contract_node_staking} GITLAWB_OPERATOR_PRIVATE_KEY=$OPERATOR_KEY GITLAWB_PINATA_JWT=$PINATA_JWT GITLAWB_TIGRIS_BUCKET=${tigris_bucket} -S3_ACCESS_KEY_ID=${s3_access_key_id} +S3_ACCESS_KEY_ID=$S3_KEY_ID S3_SECRET_ACCESS_KEY=$S3_SECRET S3_ENDPOINT_URL=${s3_endpoint_url} ENV_EOF @@ -97,6 +98,6 @@ COMPOSE_EOF cd /opt/gitlawb docker compose pull -docker compose up -d +docker compose up -d --remove-orphans echo "gitlawb-node bootstrap complete" diff --git a/infra/fly/fly.toml b/infra/fly/fly.toml index 2637fa7..3f6c53a 100644 --- a/infra/fly/fly.toml +++ b/infra/fly/fly.toml @@ -1,3 +1,5 @@ +# NOTE: this config targets the shared TEST instance. To deploy your own +# node, change `app` (and the volume/URL below) or use `fly deploy -a `. app = "gitlawb-node-test" primary_region = "iad" From a759c01f501fac942c33512333ee8851a36c2179 Mon Sep 17 00:00:00 2001 From: Kevin Codex Date: Fri, 5 Jun 2026 20:07:07 +0800 Subject: [PATCH 5/6] feat(infra): optional customer-managed KMS key for SSM secrets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ssm_kms_key_id (default null = AWS-managed aws/ssm key). When set, all five SecureString parameters are encrypted with the CMK and the instance role gains kms:Decrypt scoped to that key — without it the boot-time secret fetch would fail. Co-Authored-By: OpenClaude --- infra/aws/README.md | 5 +- infra/aws/main.tf | 78 ++++++++++++++++++------------ infra/aws/terraform.tfvars.example | 3 ++ infra/aws/variables.tf | 6 +++ 4 files changed, 61 insertions(+), 31 deletions(-) diff --git a/infra/aws/README.md b/infra/aws/README.md index 754f559..41a56d9 100644 --- a/infra/aws/README.md +++ b/infra/aws/README.md @@ -129,7 +129,10 @@ them up in the EC2 console if unwanted. The Elastic IP is released on destroy. files on disk (only in `/opt/gitlawb/.env`, mode 600). It IS in Terraform state — treat state as sensitive (another reason for the S3 backend). - Sensitive optional vars (`operator_private_key`, `pinata_jwt`, - `s3_secret_access_key`) follow the same SSM path. + `s3_access_key_id`, `s3_secret_access_key`) follow the same SSM path. +- SSM secrets use the AWS-managed `aws/ssm` key by default; set + `ssm_kms_key_id` to encrypt with a customer-managed KMS key instead (the + instance role is granted `kms:Decrypt` on that key automatically). - IMDSv2 is required; metrics port is closed unless `metrics_ingress_cidr` is set. - The node serves plain HTTP on 7545. For TLS, put a DNS name + proxy (ALB/CloudFront/Caddy) in front and set `public_url` accordingly. diff --git a/infra/aws/main.tf b/infra/aws/main.tf index a1e65b1..51342c9 100644 --- a/infra/aws/main.tf +++ b/infra/aws/main.tf @@ -42,42 +42,47 @@ resource "random_password" "postgres" { } resource "aws_ssm_parameter" "postgres_password" { - name = "/${var.name_prefix}/postgres_password" - type = "SecureString" - value = random_password.postgres.result - tags = local.common_tags + name = "/${var.name_prefix}/postgres_password" + type = "SecureString" + key_id = var.ssm_kms_key_id + value = random_password.postgres.result + tags = local.common_tags } resource "aws_ssm_parameter" "operator_key" { - count = var.operator_private_key != "" ? 1 : 0 - name = "/${var.name_prefix}/operator_private_key" - type = "SecureString" - value = var.operator_private_key - tags = local.common_tags + count = var.operator_private_key != "" ? 1 : 0 + name = "/${var.name_prefix}/operator_private_key" + type = "SecureString" + key_id = var.ssm_kms_key_id + value = var.operator_private_key + tags = local.common_tags } resource "aws_ssm_parameter" "pinata_jwt" { - count = var.pinata_jwt != "" ? 1 : 0 - name = "/${var.name_prefix}/pinata_jwt" - type = "SecureString" - value = var.pinata_jwt - tags = local.common_tags + count = var.pinata_jwt != "" ? 1 : 0 + name = "/${var.name_prefix}/pinata_jwt" + type = "SecureString" + key_id = var.ssm_kms_key_id + value = var.pinata_jwt + tags = local.common_tags } resource "aws_ssm_parameter" "s3_secret" { - count = var.s3_secret_access_key != "" ? 1 : 0 - name = "/${var.name_prefix}/s3_secret_access_key" - type = "SecureString" - value = var.s3_secret_access_key - tags = local.common_tags + count = var.s3_secret_access_key != "" ? 1 : 0 + name = "/${var.name_prefix}/s3_secret_access_key" + type = "SecureString" + key_id = var.ssm_kms_key_id + value = var.s3_secret_access_key + tags = local.common_tags } resource "aws_ssm_parameter" "s3_access_key" { - count = var.s3_access_key_id != "" ? 1 : 0 - name = "/${var.name_prefix}/s3_access_key_id" - type = "SecureString" - value = var.s3_access_key_id - tags = local.common_tags + count = var.s3_access_key_id != "" ? 1 : 0 + name = "/${var.name_prefix}/s3_access_key_id" + type = "SecureString" + key_id = var.ssm_kms_key_id + value = var.s3_access_key_id + tags = local.common_tags } locals { @@ -92,9 +97,15 @@ locals { # --------------------------------------------------------------------------- # IAM: SSM Session Manager access + least-privilege read of our parameters. -# (AWS-managed aws/ssm KMS key needs no explicit kms:Decrypt grant.) +# The AWS-managed aws/ssm KMS key needs no explicit kms:Decrypt grant; a +# customer-managed key (ssm_kms_key_id) gets one scoped to that key. # --------------------------------------------------------------------------- +data "aws_kms_key" "ssm" { + count = var.ssm_kms_key_id != null ? 1 : 0 + key_id = var.ssm_kms_key_id +} + resource "aws_iam_role" "node" { name = "${var.name_prefix}-instance" tags = local.common_tags @@ -120,11 +131,18 @@ resource "aws_iam_role_policy" "ssm_params_read" { policy = jsonencode({ Version = "2012-10-17" - Statement = [{ - Effect = "Allow" - Action = ["ssm:GetParameter"] - Resource = local.secret_param_arns - }] + Statement = concat( + [{ + Effect = "Allow" + Action = ["ssm:GetParameter"] + Resource = local.secret_param_arns + }], + var.ssm_kms_key_id != null ? [{ + Effect = "Allow" + Action = ["kms:Decrypt"] + Resource = [data.aws_kms_key.ssm[0].arn] + }] : [] + ) }) } diff --git a/infra/aws/terraform.tfvars.example b/infra/aws/terraform.tfvars.example index 82e5f89..55f92da 100644 --- a/infra/aws/terraform.tfvars.example +++ b/infra/aws/terraform.tfvars.example @@ -39,4 +39,7 @@ instance_type = "t4g.small" # ARM/Graviton; node image is multi-arch # s3_secret_access_key = "..." # stored in SSM SecureString, not user-data # s3_endpoint_url = "https://fly.storage.tigris.dev" +# Encrypt SSM secrets with a customer-managed KMS key instead of aws/ssm +# ssm_kms_key_id = "alias/my-key" + # tags = { Environment = "production" } diff --git a/infra/aws/variables.tf b/infra/aws/variables.tf index c7e9471..7fc4c4b 100644 --- a/infra/aws/variables.tf +++ b/infra/aws/variables.tf @@ -155,6 +155,12 @@ variable "max_pack_bytes" { default = "524288000" } +variable "ssm_kms_key_id" { + description = "Customer-managed KMS key (ID, alias, or ARN) for encrypting the SSM SecureString secrets. null = AWS-managed aws/ssm key." + type = string + default = null +} + # --------------------------------------------------------------------------- # Postgres # --------------------------------------------------------------------------- From 5f06251e55c2e5b1eb2614b6efcf663a909c0940 Mon Sep 17 00:00:00 2001 From: Kevin Codex Date: Fri, 5 Jun 2026 22:04:47 +0800 Subject: [PATCH 6/6] fix(infra): scope CMK kms:Decrypt to this stack's SSM parameters The grant on the customer-managed key was unconditioned, allowing the instance role to decrypt any ciphertext under the same CMK. Restrict it with kms:ViaService = ssm..amazonaws.com and kms:EncryptionContext:PARAMETER_ARN limited to the stack's parameter ARNs. Co-Authored-By: OpenClaude --- infra/aws/main.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/infra/aws/main.tf b/infra/aws/main.tf index 51342c9..42fecf0 100644 --- a/infra/aws/main.tf +++ b/infra/aws/main.tf @@ -141,6 +141,14 @@ resource "aws_iam_role_policy" "ssm_params_read" { Effect = "Allow" Action = ["kms:Decrypt"] Resource = [data.aws_kms_key.ssm[0].arn] + # Only via Parameter Store, and only for this stack's parameters — + # not arbitrary ciphertext encrypted under the same CMK. + Condition = { + StringEquals = { + "kms:ViaService" = "ssm.${var.region}.amazonaws.com" + "kms:EncryptionContext:PARAMETER_ARN" = local.secret_param_arns + } + } }] : [] ) })