diff --git a/.github/workflows/smoke-reference.yml b/.github/workflows/smoke-reference.yml new file mode 100644 index 00000000..8b875b41 --- /dev/null +++ b/.github/workflows/smoke-reference.yml @@ -0,0 +1,125 @@ +name: Smoke Reference + +# Runs the three reference-model smoke tests tagged `@Tag("smoke-reference")`: +# - Qwen3-1.7B Q8_0 GGUF (kllama runner) +# - Gemma-4 E2B SafeTensors (kgemma runner) +# - BERT + LEAF SafeTensors (llm-test-java) +# +# By default each test self-skips when its model artifact is not resolvable +# through the standard env-var / `~/.lmstudio/models/` / `~/.cache/huggingface/hub/` +# fallback chain, so this workflow is **green with empty inputs** — it merely +# proves the wiring compiles and the JUnit filter resolves the smoke tier. +# +# To make a run actually exercise the models, trigger it with the matching +# workflow inputs (URLs / paths). The job downloads each artifact, sets the +# corresponding env var the test reads (see Qwen3ReferenceSmokeTest.kt, +# Gemma4ReferenceSmokeTest.kt, BertLeafReferenceSmokeTest.java), and runs +# the same `./gradlew test -PsmokeReference -PincludeIntegration` invocation +# that's documented in the repo's CHANGELOG and reference-smoke tests. +# +# Trigger pattern is manual (`workflow_dispatch`) — wiring this onto every +# push would silently consume Actions minutes without doing meaningful work +# until artifacts are available. A self-hosted runner with the three +# checkpoints pre-cached on disk is the natural place to flip this to +# `push: branches: [develop]` later. + +on: + workflow_dispatch: + inputs: + qwen3_gguf_url: + description: "Direct URL to Qwen3-1.7B-Q8_0.gguf (~1.9 GB). Leave blank to skip the kllama test." + required: false + default: "" + gemma4_safetensors_dir_url: + description: "Direct URL to a tar.gz containing the Gemma-4 E2B SafeTensors checkpoint directory. Leave blank to skip the kgemma test." + required: false + default: "" + leaf_safetensors_dir_url: + description: "Direct URL to a tar.gz containing the MongoDB mdbr-leaf-ir SafeTensors checkpoint directory. Leave blank to skip the BERT+LEAF test." + required: false + default: "" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + smoke-reference: + runs-on: ubuntu-latest + timeout-minutes: 90 + + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Copy CI gradle.properties + run: mkdir -p ~/.gradle ; cp .github/ci-gradle.properties ~/.gradle/gradle.properties + + - name: Set up JDK 25 + uses: actions/setup-java@v5 + with: + distribution: 'zulu' + java-version: 25 + + - name: Disk space (before downloads) + run: df -h + + - name: Stage Qwen3-1.7B Q8_0 GGUF + if: inputs.qwen3_gguf_url != '' + env: + URL: ${{ inputs.qwen3_gguf_url }} + run: | + set -euo pipefail + mkdir -p "$RUNNER_TEMP/models/qwen3" + curl -fsSL "$URL" -o "$RUNNER_TEMP/models/qwen3/Qwen3-1.7B-Q8_0.gguf" + echo "QWEN3_1B7_MODEL_PATH=$RUNNER_TEMP/models/qwen3/Qwen3-1.7B-Q8_0.gguf" >> "$GITHUB_ENV" + + - name: Stage Gemma-4 E2B SafeTensors + if: inputs.gemma4_safetensors_dir_url != '' + env: + URL: ${{ inputs.gemma4_safetensors_dir_url }} + run: | + set -euo pipefail + mkdir -p "$RUNNER_TEMP/models/gemma4" + curl -fsSL "$URL" | tar -xz -C "$RUNNER_TEMP/models/gemma4" + echo "GEMMA4_E2B_SAFETENSORS_PATH=$RUNNER_TEMP/models/gemma4" >> "$GITHUB_ENV" + + - name: Stage MongoDB LEAF SafeTensors + if: inputs.leaf_safetensors_dir_url != '' + env: + URL: ${{ inputs.leaf_safetensors_dir_url }} + run: | + set -euo pipefail + mkdir -p "$RUNNER_TEMP/models/leaf" + curl -fsSL "$URL" | tar -xz -C "$RUNNER_TEMP/models/leaf" + echo "LEAF_MODEL_DIR=$RUNNER_TEMP/models/leaf" >> "$GITHUB_ENV" + + - name: Run smoke-reference tier + env: + GRADLE_OPTS: -Dorg.gradle.jvmargs="-Xmx4g -Dfile.encoding=UTF-8" + run: | + ./gradlew --no-daemon --stacktrace \ + -Dorg.gradle.caching=true \ + -Dorg.gradle.configuration-cache=true \ + -PsmokeReference -PincludeIntegration \ + test + + - name: Disk space (after run) + if: always() + run: df -h || true + + - name: Memory info (on failure) + if: failure() + run: | + free -h || true + cat /proc/meminfo | head -n 50 || true + + - name: Upload smoke-reference test reports + if: always() + uses: actions/upload-artifact@v7 + with: + name: smoke-reference-reports + path: | + **/build/reports/tests/** + **/build/test-results/** + retention-days: 14 diff --git a/CHANGELOG.md b/CHANGELOG.md index 14651837..837c79d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -93,6 +93,18 @@ window without a tagged 0.24.x release on either side. `MongoDB-mdbr-leaf-ir`) so the shell smoke harness and the JVM smoke tier point at the same artifacts. The `smoke-test.sh` script does not yet consume the flag — follow-up. +- **`smoke-reference` GitHub Actions workflow.** New + `.github/workflows/smoke-reference.yml` triggers the three + `@Tag("smoke-reference")` tests via `./gradlew test -PsmokeReference + -PincludeIntegration`. `workflow_dispatch`-only (manual) with three + optional URL inputs — supply each artifact URL via the dispatch form + and the staging steps download it into `RUNNER_TEMP`, set the env var + the test reads (`QWEN3_1B7_MODEL_PATH` / `GEMMA4_E2B_SAFETENSORS_PATH` + / `LEAF_MODEL_DIR`), and the smoke tier actually exercises the models. + Run with empty inputs and every test self-skips via JUnit + `Assumptions` — the workflow is green either way, so it's safe to + promote to `push: branches: [develop]` later once a self-hosted + runner with pre-cached checkpoints is available. - **Catalog goes BOM-only.** Every `skainet-*` alias in `gradle/libs.versions.toml` is now coordinate-only (no `version.ref`); versions are supplied by the `sk.ainet:skainet-bom` platform @@ -128,9 +140,6 @@ changes land in follow-up PRs. `Require(BF16)` for GGUF today (no KEEP_NATIVE GGUF backing yet), so this is parked until the engine grows that path. *(SafeTensors BF16 KEEP_NATIVE shipped in this release — see Added.)* -- **A `smoke-reference` GitHub Actions job.** The Gradle filter is in - place; the CI workflow that triggers it (with self-hosted model cache) - lands separately. ## [0.23.4] — 2026-05-08