diff --git a/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOps.kt b/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOps.kt index d61eb889..0e2c889a 100644 --- a/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOps.kt +++ b/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOps.kt @@ -24,6 +24,7 @@ import sk.ainet.lang.tensor.data.Q5_1TensorData import sk.ainet.lang.tensor.data.Q5_1BlockTensorData import sk.ainet.lang.tensor.data.Q5_0TensorData import sk.ainet.lang.tensor.data.Q5_0BlockTensorData +import sk.ainet.lang.tensor.data.Q8_0BlockTensorData import sk.ainet.lang.tensor.data.TensorData import sk.ainet.lang.tensor.data.TensorDataFactory import sk.ainet.lang.tensor.ops.UpsampleMode @@ -606,6 +607,12 @@ public open class DefaultCpuOpsBase(protected val dataFactory: TensorDataFactory is Q6_KTensorData -> return newTensor(Q6_KBlockTensorData(Shape(cols, rows), d.packedData) as TensorData, tensor.dtype, tensor) is Q5_1TensorData -> return newTensor(Q5_1BlockTensorData(Shape(cols, rows), d.packedData) as TensorData, tensor.dtype, tensor) is Q5_0TensorData -> return newTensor(Q5_0BlockTensorData(Shape(cols, rows), d.packedData) as TensorData, tensor.dtype, tensor) + // Q8_0 lazy transpose: rewrap the same input-block-major bytes with + // flipped shape (bytes are layout-agnostic to the [out,in] kernel + // convention) so a packed Q8_0 weight (e.g. gemma's tied lm_head) + // survives linearProject's transpose instead of hitting the generic + // FP32 path (Byte→Float ClassCastException). See transformers #178. + is Q8_0TensorData -> return newTensor(Q8_0BlockTensorData(Shape(cols, rows), d.packedData) as TensorData, tensor.dtype, tensor) else -> {} } }