From 87bfda1280ceea8fcc58f0969971fe9b9def57e9 Mon Sep 17 00:00:00 2001 From: Jeonghwan Lee Date: Tue, 2 Jun 2026 16:02:40 +0900 Subject: [PATCH 1/2] ci: pin linux build runners to ubuntu-22.04 Pin the linux build/release matrix runners from ubuntu-latest (24.04) and ubuntu-24.04-arm to ubuntu-22.04 / ubuntu-22.04-arm across the llama-server and binary build workflows. Building on 22.04 links against an older glibc, so the shipped llama-server and runed binaries stay compatible with older distros than building on 24.04 would allow. The publish job is left on ubuntu-latest since it only assembles artifacts and creates the release (no compiled output). Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/build-llama.yml | 4 ++-- .github/workflows/build.yml | 4 ++-- .github/workflows/release.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-llama.yml b/.github/workflows/build-llama.yml index 4821a6c..fe4b527 100644 --- a/.github/workflows/build-llama.yml +++ b/.github/workflows/build-llama.yml @@ -62,8 +62,8 @@ jobs: matrix: include: - { runner: macos-14, os: darwin, arch: arm64, kind: unix } - - { runner: ubuntu-latest, os: linux, arch: amd64, kind: unix } - - { runner: ubuntu-24.04-arm, os: linux, arch: arm64, kind: unix } + - { runner: ubuntu-22.04, os: linux, arch: amd64, kind: unix } + - { runner: ubuntu-22.04-arm, os: linux, arch: arm64, kind: unix } - { runner: windows-latest, os: windows, arch: amd64, kind: windows } steps: diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c1760b1..05c1c79 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,8 +23,8 @@ jobs: matrix: include: - { runner: macos-14, goos: darwin, goarch: arm64, smoke: true } - - { runner: ubuntu-latest, goos: linux, goarch: amd64, smoke: true } - - { runner: ubuntu-24.04-arm, goos: linux, goarch: arm64, smoke: true } + - { runner: ubuntu-22.04, goos: linux, goarch: amd64, smoke: true } + - { runner: ubuntu-22.04-arm, goos: linux, goarch: arm64, smoke: true } runs-on: ${{ matrix.runner }} steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 5e693b2..49aedde 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -38,11 +38,11 @@ jobs: goos: darwin goarch: arm64 kind: tarball - - runner: ubuntu-latest + - runner: ubuntu-22.04 goos: linux goarch: amd64 kind: tarball - - runner: ubuntu-24.04-arm + - runner: ubuntu-22.04-arm goos: linux goarch: arm64 kind: tarball From 815d086459a2d38e9c62de3ccb99ec3c52a7171e Mon Sep 17 00:00:00 2001 From: Jeonghwan Lee Date: Tue, 2 Jun 2026 16:09:07 +0900 Subject: [PATCH 2/2] ci: name llama-server/runed artifacts by build OS; drop Windows Two related cleanups on top of the ubuntu-22.04 runner pin: 1. Encode the build OS in artifact names so the glibc/SDK baseline is visible and cacheable, read from the running image (RUNNER_OS + /etc/os-release / sw_vers) rather than the runner label (which can be a rolling alias like ubuntu-latest): - llama-server GHCR OCI tag: -- -> --- e.g. b9352-ubuntu-2204-amd64 / b9352-mac-14-arm64 An OS bump now busts the cache and forces a fresh build instead of reusing a binary linked against an incompatible glibc. - release tarball (Makefile, via OS_LABEL passed by release.yaml): runed---.tar.gz -> runed--ubuntu-2204-amd64.tar.gz / runed--mac-14-arm64.tar.gz OS_LABEL defaults to GOOS so local `make release-tarball` is unchanged. 2. Drop Windows from CI for now: remove the windows-latest matrix entries and all windows-only steps from build-llama.yml and release.yaml (the llama-server.exe build/pull/push and the release zip), and drop the now-empty `*.zip` from the publish upload glob. Stale Windows comments in the Makefile are removed. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/build-llama.yml | 90 +++++++++++++++---------------- .github/workflows/release.yaml | 67 ++++++++++------------- Makefile | 19 ++++--- 3 files changed, 81 insertions(+), 95 deletions(-) diff --git a/.github/workflows/build-llama.yml b/.github/workflows/build-llama.yml index fe4b527..9de9935 100644 --- a/.github/workflows/build-llama.yml +++ b/.github/workflows/build-llama.yml @@ -1,6 +1,7 @@ name: build-llama -# Vendor prebuild: builds llama-server for every supported platform, +# Vendor prebuild: builds llama-server for every supported platform +# (linux amd64/arm64 + darwin arm64; Windows support is dropped for now), # publishes the binary to GHCR (when permissions allow), and emits an # actions/upload-artifact so callers via workflow_call can consume the # binary without going through GHCR. build.yml and release.yaml call @@ -17,7 +18,11 @@ name: build-llama # - push to main on the same paths: build AND push to GHCR. # # Artifacts: -# GHCR: ghcr.io//llama-server:-- +# GHCR: ghcr.io//llama-server:--- +# e.g. ...-ubuntu-2204-amd64 / ...-mac-14-arm64. +# OS type+version are read from the running image (not the runner +# label, which can be a rolling alias), so an OS/glibc bump busts the +# cache and triggers a fresh build. # Workflow artifact: llama-server-- containing the binary. on: @@ -61,10 +66,9 @@ jobs: fail-fast: false matrix: include: - - { runner: macos-14, os: darwin, arch: arm64, kind: unix } - - { runner: ubuntu-22.04, os: linux, arch: amd64, kind: unix } - - { runner: ubuntu-22.04-arm, os: linux, arch: arm64, kind: unix } - - { runner: windows-latest, os: windows, arch: amd64, kind: windows } + - { runner: macos-14, os: darwin, arch: arm64 } + - { runner: ubuntu-22.04, os: linux, arch: amd64 } + - { runner: ubuntu-22.04-arm, os: linux, arch: arm64 } steps: - uses: actions/checkout@v4 @@ -83,8 +87,35 @@ jobs: id: tag shell: bash run: | + set -euo pipefail ref_repo=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') - echo "tag=ghcr.io/${ref_repo}/llama-server:${{ steps.ref.outputs.ref }}-${{ matrix.os }}-${{ matrix.arch }}" >> "$GITHUB_OUTPUT" + # Tag layout: ---, e.g. + # b9352-ubuntu-2204-amd64 / b9352-mac-14-arm64 + # OS type+version are read from the running image (not the runner + # label, which can be a rolling alias like ubuntu-latest) so the + # build baseline — most importantly the linux glibc version — is part + # of the cache key. An OS bump then busts the cache and forces a + # fresh build instead of reusing a binary built against another glibc. + case "${RUNNER_OS}" in + Linux) + . /etc/os-release + os_type="${ID}" # ubuntu + os_version="${VERSION_ID//./}" # 22.04 -> 2204 + ;; + macOS) + os_type="mac" + os_version="$(sw_vers -productVersion | cut -d. -f1)" # 14 + ;; + *) + echo "::error::unhandled RUNNER_OS=${RUNNER_OS}" >&2 + exit 1 + ;; + esac + if [ -z "${os_type:-}" ] || [ -z "${os_version:-}" ]; then + echo "::error::could not resolve OS type/version for the tag (RUNNER_OS=${RUNNER_OS})" >&2 + exit 1 + fi + echo "tag=ghcr.io/${ref_repo}/llama-server:${{ steps.ref.outputs.ref }}-${os_type}-${os_version}-${{ matrix.arch }}" >> "$GITHUB_OUTPUT" # ─── GHCR login + existence check ───────────────────────────────── - name: Install oras @@ -128,7 +159,7 @@ jobs: # ─── Path 1: Pull existing binary from GHCR ───────────────────────── - name: Pull llama-server from GHCR - if: steps.plan.outputs.skip_build == 'true' && matrix.kind == 'unix' + if: steps.plan.outputs.skip_build == 'true' shell: bash run: | set -euo pipefail @@ -137,44 +168,18 @@ jobs: chmod +x bin/llama-server bin/llama-server --version - - name: Pull llama-server.exe from GHCR - if: steps.plan.outputs.skip_build == 'true' && matrix.kind == 'windows' - shell: pwsh - run: | - New-Item -ItemType Directory -Force -Path bin | Out-Null - oras pull --output bin "${{ steps.tag.outputs.tag }}" - # ─── Path 2: Build llama-server fresh ───────────────────────────── - - name: Clone llama.cpp (windows) - if: steps.plan.outputs.skip_build != 'true' && matrix.kind == 'windows' - shell: bash - run: | - mkdir -p third_party - git clone --depth 1 --branch ${{ steps.ref.outputs.ref }} \ - https://github.com/ggml-org/llama.cpp third_party/llama.cpp - - - name: Build llama-server (unix) - if: steps.plan.outputs.skip_build != 'true' && matrix.kind == 'unix' + - name: Build llama-server + if: steps.plan.outputs.skip_build != 'true' run: make llama-server - - name: Build llama-server.exe (windows) - if: steps.plan.outputs.skip_build != 'true' && matrix.kind == 'windows' - shell: pwsh - run: | - cmake -B third_party/llama.cpp/build -S third_party/llama.cpp -C ci/llama-cpp-cache.cmake - # -j 2 caps parallel compile jobs to avoid OOM on 16GB runners. - cmake --build third_party/llama.cpp/build --target llama-server -j 2 --config Release - New-Item -ItemType Directory -Force -Path bin | Out-Null - Copy-Item third_party/llama.cpp/build/bin/Release/llama-server.exe bin/ - - name: Sanity check binary - if: matrix.kind == 'unix' shell: bash run: bin/llama-server --version # ─── GHCR push (only when we built fresh AND we have write perms) ──── - - name: Push llama-server to GHCR (unix) - if: steps.plan.outputs.skip_build != 'true' && github.event_name != 'pull_request' && matrix.kind == 'unix' + - name: Push llama-server to GHCR + if: steps.plan.outputs.skip_build != 'true' && github.event_name != 'pull_request' shell: bash working-directory: bin run: | @@ -182,15 +187,6 @@ jobs: --artifact-type "application/vnd.runed.llama-server" \ llama-server - - name: Push llama-server.exe to GHCR (windows) - if: steps.plan.outputs.skip_build != 'true' && github.event_name != 'pull_request' && matrix.kind == 'windows' - shell: pwsh - working-directory: bin - run: | - oras push "${{ steps.tag.outputs.tag }}" ` - --artifact-type "application/vnd.runed.llama-server" ` - llama-server.exe - # ─── Workflow artifact (always — consumed by build.yml / release.yaml) ─ - name: Upload llama-server artifact uses: actions/upload-artifact@v4 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 49aedde..f1cae12 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -34,23 +34,10 @@ jobs: matrix: include: # Full-stack tarballs: runed + rundemo + llama-server. - - runner: macos-14 - goos: darwin - goarch: arm64 - kind: tarball - - runner: ubuntu-22.04 - goos: linux - goarch: amd64 - kind: tarball - - runner: ubuntu-22.04-arm - goos: linux - goarch: arm64 - kind: tarball - # Windows: llama-server.exe only zip (runed Windows support deferred). - - runner: windows-latest - goos: windows - goarch: amd64 - kind: llama-only + # (Windows support is dropped for now — no Windows target here.) + - { runner: macos-14, goos: darwin, goarch: arm64 } + - { runner: ubuntu-22.04, goos: linux, goarch: amd64 } + - { runner: ubuntu-22.04-arm, goos: linux, goarch: arm64 } steps: - uses: actions/checkout@v4 @@ -73,25 +60,40 @@ jobs: name: llama-server-${{ matrix.goos }}-${{ matrix.goarch }} path: bin/ - - name: Ensure llama-server executable (unix) - if: matrix.kind == 'tarball' + - name: Ensure llama-server executable shell: bash run: chmod +x bin/llama-server - # ─── Smoke gate before packaging (tarball matrix only) ────────── + # Build OS label (ubuntu-2204 / mac-14) for the release asset name, read + # from the running image so it reflects the actual build baseline/glibc. + - name: Resolve build OS label + id: osinfo + shell: bash + run: | + set -euo pipefail + case "${RUNNER_OS}" in + Linux) + . /etc/os-release + echo "label=${ID}-${VERSION_ID//./}" >> "$GITHUB_OUTPUT" # ubuntu-2204 + ;; + macOS) + echo "label=mac-$(sw_vers -productVersion | cut -d. -f1)" >> "$GITHUB_OUTPUT" # mac-14 + ;; + *) + echo "::error::unhandled RUNNER_OS=${RUNNER_OS}" >&2; exit 1 ;; + esac + + # ─── Smoke gate before packaging ──────────────────────────────── - uses: actions/setup-go@v5 - if: matrix.kind == 'tarball' with: go-version-file: go.mod check-latest: true - uses: bufbuild/buf-setup-action@v1 - if: matrix.kind == 'tarball' with: github_token: ${{ secrets.GITHUB_TOKEN }} - name: Read embedding model pin - if: matrix.kind == 'tarball' id: model shell: bash run: | @@ -100,14 +102,12 @@ jobs: echo "file=$(awk '/^file:/ {print $2}' .embedding-model.yaml)" >> "$GITHUB_OUTPUT" - name: Cache embedding model - if: matrix.kind == 'tarball' uses: actions/cache@v4 with: path: models key: embedding-model-${{ steps.model.outputs.sha256 }} - name: Fetch embedding model (cache miss only) - if: matrix.kind == 'tarball' shell: bash env: FILE: ${{ steps.model.outputs.file }} @@ -122,7 +122,6 @@ jobs: echo "${SHA} models/${FILE}" | shasum -a 256 -c - - name: Run integration tests (release gate) - if: matrix.kind == 'tarball' env: RUNED_TEST_LLAMA_SERVER: ${{ github.workspace }}/bin/llama-server RUNED_TEST_GGUF: ${{ github.workspace }}/models/${{ steps.model.outputs.file }} @@ -130,7 +129,6 @@ jobs: # ─── Build runed + package tarball ────────────────────────────── - name: Build runed binaries - if: matrix.kind == 'tarball' env: GOOS: ${{ matrix.goos }} GOARCH: ${{ matrix.goarch }} @@ -139,24 +137,13 @@ jobs: run: make build - name: Package tarball - if: matrix.kind == 'tarball' env: GOOS: ${{ matrix.goos }} GOARCH: ${{ matrix.goarch }} + OS_LABEL: ${{ steps.osinfo.outputs.label }} VERSION: ${{ steps.meta.outputs.version }} run: make release-tarball - # ─── Windows: llama-server.exe zip ────────────────────────────── - - name: Package llama-server zip (windows) - if: matrix.kind == 'llama-only' - shell: pwsh - run: | - New-Item -ItemType Directory -Force -Path dist | Out-Null - $name = "llama-server-${{ steps.meta.outputs.version }}-${{ matrix.goos }}-${{ matrix.goarch }}.zip" - Compress-Archive -Path bin/llama-server.exe -DestinationPath "dist/$name" - $hash = (Get-FileHash "dist/$name" -Algorithm SHA256).Hash.ToLower() - "$hash $name" | Out-File -Encoding ascii "dist/$name.sha256" - - uses: actions/upload-artifact@v4 with: name: release-${{ matrix.goos }}-${{ matrix.goarch }} @@ -224,4 +211,4 @@ jobs: --title "${{ steps.meta.outputs.version }}" \ --generate-notes \ --prerelease \ - manifest.json *.tar.gz *.zip *.sha256 + manifest.json *.tar.gz *.sha256 diff --git a/Makefile b/Makefile index c6260c4..ffba173 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,11 @@ VERSION ?= v0.1.0-alpha GOOS ?= $(shell go env GOOS) GOARCH ?= $(shell go env GOARCH) +# OS_LABEL names the build OS in the release tarball (e.g. ubuntu-2204, mac-14) +# so the artifact records its glibc/SDK baseline. CI passes the running image's +# real identity; local `make release-tarball` falls back to GOOS. +OS_LABEL ?= $(GOOS) + LLAMA_CPP_REF := $(shell cat .llama-cpp-version) LLAMA_CPP_DIR := third_party/llama.cpp LLAMA_CPP_CACHE := $(CURDIR)/ci/llama-cpp-cache.cmake @@ -57,9 +62,8 @@ build: proto -o bin/rundemo ./cmd/rundemo # Clone (shallow) and CPU-build llama-server at the pinned ref. -# Unix-only target — Windows CI invokes cmake directly because make/sh aren't -# the natural toolchain there. Reentrant: skips git clone if the directory -# already exists at the right ref, and skips cmake if the binary is fresh. +# Reentrant: skips git clone if the directory already exists at the right ref, +# and skips cmake if the binary is fresh. llama-server: @if [ ! -d "$(LLAMA_CPP_DIR)/.git" ]; then \ mkdir -p $(dir $(LLAMA_CPP_DIR)); \ @@ -67,8 +71,8 @@ llama-server: https://github.com/ggml-org/llama.cpp $(LLAMA_CPP_DIR); \ fi cmake -B $(LLAMA_CPP_DIR)/build -S $(LLAMA_CPP_DIR) -C $(LLAMA_CPP_CACHE) $(LLAMA_CMAKE_EXTRA) - # -j2 caps parallel compile jobs: ubuntu-latest (16GB / 4 vCPU) OOM-killed - # the build at -j auto. ubuntu-24.04-arm and macos-14 survived but the + # -j2 caps parallel compile jobs: ubuntu-22.04 (16GB / 4 vCPU) OOM-killed + # the build at -j auto. ubuntu-22.04-arm and macos-14 survived but the # bound is uniform across matrices for predictability. cmake --build $(LLAMA_CPP_DIR)/build --target llama-server -j 2 --config Release mkdir -p bin @@ -81,11 +85,10 @@ clean: rm -rf bin/ gen/ dist/ # Packages the Go binaries plus llama-server into a single tarball. -# Assumes `make build` and `make llama-server` (or the workflow's Windows -# equivalent) have already populated bin/. +# Assumes `make build` and `make llama-server` have already populated bin/. release-tarball: mkdir -p dist - TARNAME=runed-$(VERSION)-$(GOOS)-$(GOARCH).tar.gz; \ + TARNAME=runed-$(VERSION)-$(OS_LABEL)-$(GOARCH).tar.gz; \ tar -czf dist/$$TARNAME -C bin runed rundemo llama-server; \ cd dist && ( \ (command -v shasum >/dev/null 2>&1 && shasum -a 256 $$TARNAME > $$TARNAME.sha256) \