diff --git a/docs/eager-execution-backends-and-kernels.md b/docs/eager-execution-backends-and-kernels.md index 0544dadf..8f2aa33a 100644 --- a/docs/eager-execution-backends-and-kernels.md +++ b/docs/eager-execution-backends-and-kernels.md @@ -88,5 +88,7 @@ those formats were JVM-only and broke on Native. - ❌ **Other GGML quant formats** (Q5_K, Q2_K, Q3_K, Q8_K, IQ4_NL/XS) — loadable via dequant-to-FP32, but no packed matmul kernel. - ❌ **Non-CPU eager backends** (IREE, Metal, GPU) — the `KernelProvider` SPI anticipates them, but none are implemented for the eager path today. -> Generated as a hand-authored overview. A machine-generated kernel × platform matrix -> (derived from the registered `KernelProvider`s) is a planned follow-up so this stays in sync. +> This mindmap is a hand-authored overview. Its companion +> [kernel × platform support matrix](kernel-support-matrix.md) is **machine-generated** from +> the registered `KernelProvider`s (`KernelSupportMatrixTest`) and CI-gated against drift in +> the scalar floor, so the per-platform coverage stays in sync with the code. diff --git a/docs/kernel-support-matrix.md b/docs/kernel-support-matrix.md new file mode 100644 index 00000000..c1094a63 --- /dev/null +++ b/docs/kernel-support-matrix.md @@ -0,0 +1,20 @@ +# Kernel × platform support matrix + +> Generated by `KernelSupportMatrixTest`. The scalar (all-platform) coverage is +> auto-derived from `KernelProvider.supports(...)`; re-run the test to refresh. +> Cell = best available provider for `FP32 × format` on that platform. + +| Weight format | JVM | Android | Native·linux | Native·apple | JS/WASM | +|---|:--:|:--:|:--:|:--:|:--:| +| `Float32` | native-ffm | panama-vector | scalar | scalar | scalar | +| `BFloat16` | native-ffm | panama-vector | scalar | scalar | scalar | +| `Q8_0` | native-ffm | panama-vector | scalar | scalar | scalar | +| `Q4_0` | native-ffm | panama-vector | scalar | scalar | scalar | +| `Q4_K` | native-ffm | panama-vector | scalar | scalar | scalar | +| `Q6_K` | scalar | scalar | scalar | scalar | scalar | +| `Q5_1` | panama-vector | panama-vector | scalar | scalar | scalar | +| `Q5_0` | panama-vector | panama-vector | scalar | scalar | scalar | + +Priority: native-ffm (100) → panama-vector (50) → scalar (0). Formats without any cell (e.g. Q5_K/Q2_K/Q3_K/IQ4) are dequant-to-FP32 only. + +See also the [eager backends & kernels mindmap](eager-execution-backends-and-kernels.md). diff --git a/skainet-backends/skainet-backend-native-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/KernelSupportMatrixTest.kt b/skainet-backends/skainet-backend-native-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/KernelSupportMatrixTest.kt new file mode 100644 index 00000000..c42ff611 --- /dev/null +++ b/skainet-backends/skainet-backend-native-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/KernelSupportMatrixTest.kt @@ -0,0 +1,97 @@ +package sk.ainet.exec.kernel + +import java.io.File +import kotlin.test.Test +import kotlin.test.assertEquals +import sk.ainet.backend.api.kernel.KernelProvider + +/** + * Generates the kernel × platform support matrix (rendered to + * `build/kernel-support-matrix.md`) and **gates drift in the scalar floor**: the + * all-platform baseline coverage is auto-derived from `ScalarKernelProvider.supports(...)`, + * so adding/removing a scalar packed kernel without updating the docs fails this test (it + * runs under `java-tests` in CI). + * + * The SIMD/native tiers (Panama, native-FFM) are env-availability-gated (`isAvailable()` + * probes the JDK incubator module / the loaded `.so`), so their *capability* is declared + * here (the single place to edit when a provider gains a kernel) rather than probed. + */ +class KernelSupportMatrixTest { + + private val formats = listOf("Float32", "BFloat16", "Q8_0", "Q4_0", "Q4_K", "Q6_K", "Q5_1", "Q5_0") + + private data class Tier(val name: String, val priority: Int, val targets: Set, val formats: Set) + + // Source-set -> targets. commonMain reaches all; backend-cpu jvmMain -> {jvm,android}; + // backend-native-cpu jvmMain -> {jvm} (the native module declares only jvm()). + private val allTargets = setOf("jvm", "android", "native-linux", "native-apple", "js-wasm") + + private fun scalarFormats(): Set = + formats.filter { ScalarKernelProvider.supports("matmul", listOf("Float32", it)) }.toSet() + + private fun tiers(): List = listOf( + Tier("scalar", 0, allTargets, scalarFormats()), + Tier("panama-vector", 50, setOf("jvm", "android"), + setOf("Float32", "BFloat16", "Q8_0", "Q4_0", "Q4_K", "Q5_1", "Q5_0")), + Tier("native-ffm", 100, setOf("jvm"), + setOf("Float32", "BFloat16", "Q8_0", "Q4_0", "Q4_K")), + ) + + private fun bestTier(fmt: String, target: String, tiers: List): Tier? = + tiers.filter { target in it.targets && fmt in it.formats }.maxByOrNull { it.priority } + + private fun render(tiers: List): String { + val cols = listOf("jvm" to "JVM", "android" to "Android", "native-linux" to "Native·linux", + "native-apple" to "Native·apple", "js-wasm" to "JS/WASM") + val sb = StringBuilder() + sb.appendLine("# Kernel × platform support matrix") + sb.appendLine() + sb.appendLine("> Generated by `KernelSupportMatrixTest`. The scalar (all-platform) coverage is") + sb.appendLine("> auto-derived from `KernelProvider.supports(...)`; re-run the test to refresh.") + sb.appendLine("> Cell = best available provider for `FP32 × format` on that platform.") + sb.appendLine() + sb.append("| Weight format |") + cols.forEach { sb.append(" ${it.second} |") } + sb.appendLine() + sb.append("|---|") + cols.forEach { _ -> sb.append(":--:|") } + sb.appendLine() + for (fmt in formats) { + sb.append("| `$fmt` |") + for ((target, _) in cols) { + val t = bestTier(fmt, target, tiers) + sb.append(" ${t?.name ?: "—"} |") + } + sb.appendLine() + } + sb.appendLine() + sb.appendLine("Priority: native-ffm (100) → panama-vector (50) → scalar (0). " + + "Formats without any cell (e.g. Q5_K/Q2_K/Q3_K/IQ4) are dequant-to-FP32 only.") + sb.appendLine() + sb.appendLine("See also the [eager backends & kernels mindmap](eager-execution-backends-and-kernels.md).") + return sb.toString() + } + + @Test + fun generate_and_gate_support_matrix() { + val tiers = tiers() + + // Drift gate on the scalar floor (the all-platform baseline): the documented set + // below must equal what the scalar provider actually carries. Update both together. + assertEquals( + setOf("Float32", "BFloat16", "Q8_0", "Q4_0", "Q4_K", "Q6_K", "Q5_1", "Q5_0"), + scalarFormats(), + "ScalarKernelProvider coverage changed — update the matrix doc + this expected set", + ) + + // Sanity: every provider singleton is a KernelProvider (compile-time anchor). + val providers: List = listOf(ScalarKernelProvider, PanamaVectorKernelProvider, NativeKernelProvider) + assertEquals(3, providers.size) + + val md = render(tiers) + File("build").mkdirs() + File("build/kernel-support-matrix.md").writeText(md) + // Echo so CI logs carry the current matrix (easy to copy into docs/). + println("KERNEL_SUPPORT_MATRIX_BEGIN\n$md\nKERNEL_SUPPORT_MATRIX_END") + } +}