diff --git a/skainet-backends/skainet-backend-api/src/commonMain/kotlin/sk/ainet/backend/api/kernel/KernelProvider.kt b/skainet-backends/skainet-backend-api/src/commonMain/kotlin/sk/ainet/backend/api/kernel/KernelProvider.kt index a5934221..fd22f37f 100644 --- a/skainet-backends/skainet-backend-api/src/commonMain/kotlin/sk/ainet/backend/api/kernel/KernelProvider.kt +++ b/skainet-backends/skainet-backend-api/src/commonMain/kotlin/sk/ainet/backend/api/kernel/KernelProvider.kt @@ -67,6 +67,12 @@ public interface KernelProvider { */ public fun matmulQ8_0(): Q8_0MatmulKernel? = null + /** + * F32 × Q4_0 matmul kernel exposed by this provider, or `null` if + * this provider does not specialize Q4_0. Same fall-through pattern. + */ + public fun matmulQ4_0(): Q4_0MatmulKernel? = null + /** * Capability query: does this provider carry a kernel for * [opName] with the given [dtypeKeys]? @@ -100,6 +106,7 @@ public interface KernelProvider { "BFloat16" -> matmulBf16() != null "Q4_K" -> matmulQ4K() != null "Q8_0" -> matmulQ8_0() != null + "Q4_0" -> matmulQ4_0() != null else -> false } } diff --git a/skainet-backends/skainet-backend-api/src/commonMain/kotlin/sk/ainet/backend/api/kernel/Q4_0MatmulKernel.kt b/skainet-backends/skainet-backend-api/src/commonMain/kotlin/sk/ainet/backend/api/kernel/Q4_0MatmulKernel.kt new file mode 100644 index 00000000..fae0825b --- /dev/null +++ b/skainet-backends/skainet-backend-api/src/commonMain/kotlin/sk/ainet/backend/api/kernel/Q4_0MatmulKernel.kt @@ -0,0 +1,46 @@ +package sk.ainet.backend.api.kernel + +/** + * F32 input × Q4_0-packed weights matrix-vector multiply, in canonical + * ggml block layout. + * + * output[outputOffset + o] = Σ_j input[inputOffset + j] · dequant(weight[o, j]) + * for j ∈ [0, inputDim), o ∈ [0, outputDim) + * + * Block layout (32-element block, 18 bytes/block; see + * [sk.ainet.lang.tensor.data.Q4_0BlockTensorData] kdoc): + * - bytes 0..1 : `d` (block scale, FP16 LE) + * - bytes 2..17 : 16 bytes packing 32 4-bit codes (split layout — low + * nibbles decode elements 0..15, high nibbles decode elements 16..31) + * + * Per element: `dequant = (code - 8) * d` (the `- 8` bias centres the + * unsigned 4-bit code around zero). Q4_0 has no per-block min / offset. + * + * Implementations MUST NOT mutate `input` or `weight`. They MAY assume + * the arrays do not alias each other or `output`. They MUST fully + * write the `outputDim` floats starting at `output[outputOffset]`. + * + * Packed-weight row-major contract: `weight` holds blocks laid out + * `(blockIdx * outputDim + o) * 18` for output row `o` and input block + * index `blockIdx`. This matches `Q4_0BlockTensorData.packedData`. + * + * `inputDim` MUST be a multiple of 32 (the Q4_0 block size). + */ +public interface Q4_0MatmulKernel { + /** + * @param input FP32 input vector (single row). + * @param inputOffset element offset into [input] where the row starts. + * @param weight packed Q4_0 bytes for the full `outputDim × inputDim` weight tensor. + * @param weightByteOffset byte offset into [weight] where block (0, 0) starts. + * @param inputDim contraction dimension (must be a multiple of 32). + * @param outputDim number of output cells. + * @param output FP32 output vector. + * @param outputOffset element offset into [output] where the row starts. + */ + public fun matmul( + input: FloatArray, inputOffset: Int, + weight: ByteArray, weightByteOffset: Int, + inputDim: Int, outputDim: Int, + output: FloatArray, outputOffset: Int, + ) +} diff --git a/skainet-backends/skainet-backend-cpu/api/jvm/skainet-backend-cpu.api b/skainet-backends/skainet-backend-cpu/api/jvm/skainet-backend-cpu.api index af97636b..39da5c0a 100644 --- a/skainet-backends/skainet-backend-cpu/api/jvm/skainet-backend-cpu.api +++ b/skainet-backends/skainet-backend-cpu/api/jvm/skainet-backend-cpu.api @@ -17,13 +17,20 @@ public final class sk/ainet/context/DirectCpuExecutionContext : sk/ainet/context public fun getHooks ()Lsk/ainet/lang/nn/hooks/ForwardHooks; public fun getInTraining ()Z public fun getMemoryInfo ()Lsk/ainet/context/MemoryInfo; + public fun getMemoryPlanner ()Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public fun getMemoryTracker ()Lsk/ainet/lang/tensor/storage/MemoryTracker; public fun getObservers ()Lsk/ainet/context/ExecutionObserverRegistry; public fun getOps ()Lsk/ainet/lang/tensor/ops/TensorOps; public fun getPhase ()Lsk/ainet/context/Phase; + public fun getScratch ()Lsk/ainet/lang/tensor/scratch/ScratchPool; public fun getTensorDataFactory ()Lsk/ainet/lang/tensor/data/TensorDataFactory; public fun ones (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public fun placeholder (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public fun registerObserver (Lsk/ainet/context/ExecutionObserver;)V public fun unregisterObserver (Lsk/ainet/context/ExecutionObserver;)V + public fun wrapByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public fun wrapFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public fun wrapIntArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public fun zeros (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } @@ -33,6 +40,103 @@ public final class sk/ainet/context/DirectCpuExecutionContext$Companion { public static synthetic fun create$default (Lsk/ainet/context/DirectCpuExecutionContext$Companion;Lsk/ainet/context/Phase;ILjava/lang/Object;)Lsk/ainet/context/DirectCpuExecutionContext; } +public final class sk/ainet/exec/kernel/PanamaVectorBf16MatmulKernel : sk/ainet/backend/api/kernel/Bf16MatmulKernel { + public static final field INSTANCE Lsk/ainet/exec/kernel/PanamaVectorBf16MatmulKernel; + public fun matmul ([FII[BII[FIIIII)V +} + +public final class sk/ainet/exec/kernel/PanamaVectorKernelProvider : sk/ainet/backend/api/kernel/KernelProvider { + public static final field INSTANCE Lsk/ainet/exec/kernel/PanamaVectorKernelProvider; + public fun getName ()Ljava/lang/String; + public fun getPriority ()I + public fun isAvailable ()Z + public fun matmulBf16 ()Lsk/ainet/backend/api/kernel/Bf16MatmulKernel; + public fun matmulFp32 ()Lsk/ainet/backend/api/kernel/Fp32MatmulKernel; + public fun matmulQ4K ()Lsk/ainet/backend/api/kernel/Q4KMatmulKernel; + public fun matmulQ4_0 ()Lsk/ainet/backend/api/kernel/Q4_0MatmulKernel; + public fun matmulQ8_0 ()Lsk/ainet/backend/api/kernel/Q8_0MatmulKernel; + public fun supports (Ljava/lang/String;Ljava/util/List;)Z +} + +public final class sk/ainet/exec/kernel/PanamaVectorKernelProviderFactory : sk/ainet/backend/api/kernel/KernelProvider { + public fun ()V + public fun getName ()Ljava/lang/String; + public fun getPriority ()I + public fun isAvailable ()Z + public fun matmulBf16 ()Lsk/ainet/backend/api/kernel/Bf16MatmulKernel; + public fun matmulFp32 ()Lsk/ainet/backend/api/kernel/Fp32MatmulKernel; + public fun matmulQ4K ()Lsk/ainet/backend/api/kernel/Q4KMatmulKernel; + public fun matmulQ4_0 ()Lsk/ainet/backend/api/kernel/Q4_0MatmulKernel; + public fun matmulQ8_0 ()Lsk/ainet/backend/api/kernel/Q8_0MatmulKernel; + public fun supports (Ljava/lang/String;Ljava/util/List;)Z +} + +public final class sk/ainet/exec/kernel/PanamaVectorMatmulKernel : sk/ainet/backend/api/kernel/Fp32MatmulKernel { + public static final field INSTANCE Lsk/ainet/exec/kernel/PanamaVectorMatmulKernel; + public fun matmul ([FII[FII[FIIIII)V +} + +public final class sk/ainet/exec/kernel/PanamaVectorQ4KMatmulKernel : sk/ainet/backend/api/kernel/Q4KMatmulKernel { + public static final field INSTANCE Lsk/ainet/exec/kernel/PanamaVectorQ4KMatmulKernel; + public fun matmul ([FI[BIII[FI)V +} + +public final class sk/ainet/exec/kernel/PanamaVectorQ4_0MatmulKernel : sk/ainet/backend/api/kernel/Q4_0MatmulKernel { + public static final field INSTANCE Lsk/ainet/exec/kernel/PanamaVectorQ4_0MatmulKernel; + public fun matmul ([FI[BIII[FI)V +} + +public final class sk/ainet/exec/kernel/PanamaVectorQ8_0MatmulKernel : sk/ainet/backend/api/kernel/Q8_0MatmulKernel { + public static final field INSTANCE Lsk/ainet/exec/kernel/PanamaVectorQ8_0MatmulKernel; + public fun matmul ([FI[BIII[FI)V +} + +public final class sk/ainet/exec/kernel/ScalarBf16MatmulKernel : sk/ainet/backend/api/kernel/Bf16MatmulKernel { + public static final field INSTANCE Lsk/ainet/exec/kernel/ScalarBf16MatmulKernel; + public fun matmul ([FII[BII[FIIIII)V +} + +public final class sk/ainet/exec/kernel/ScalarKernelProvider : sk/ainet/backend/api/kernel/KernelProvider { + public static final field INSTANCE Lsk/ainet/exec/kernel/ScalarKernelProvider; + public fun getName ()Ljava/lang/String; + public fun getPriority ()I + public fun isAvailable ()Z + public fun matmulBf16 ()Lsk/ainet/backend/api/kernel/Bf16MatmulKernel; + public fun matmulFp32 ()Lsk/ainet/backend/api/kernel/Fp32MatmulKernel; + public fun matmulQ4K ()Lsk/ainet/backend/api/kernel/Q4KMatmulKernel; + public fun matmulQ4_0 ()Lsk/ainet/backend/api/kernel/Q4_0MatmulKernel; + public fun matmulQ8_0 ()Lsk/ainet/backend/api/kernel/Q8_0MatmulKernel; + public fun supports (Ljava/lang/String;Ljava/util/List;)Z +} + +public final class sk/ainet/exec/kernel/ScalarKernelProviderFactory : sk/ainet/backend/api/kernel/KernelProvider { + public fun ()V + public fun getName ()Ljava/lang/String; + public fun getPriority ()I + public fun isAvailable ()Z + public fun matmulBf16 ()Lsk/ainet/backend/api/kernel/Bf16MatmulKernel; + public fun matmulFp32 ()Lsk/ainet/backend/api/kernel/Fp32MatmulKernel; + public fun matmulQ4K ()Lsk/ainet/backend/api/kernel/Q4KMatmulKernel; + public fun matmulQ4_0 ()Lsk/ainet/backend/api/kernel/Q4_0MatmulKernel; + public fun matmulQ8_0 ()Lsk/ainet/backend/api/kernel/Q8_0MatmulKernel; + public fun supports (Ljava/lang/String;Ljava/util/List;)Z +} + +public final class sk/ainet/exec/kernel/ScalarMatmulKernel : sk/ainet/backend/api/kernel/Fp32MatmulKernel { + public static final field INSTANCE Lsk/ainet/exec/kernel/ScalarMatmulKernel; + public fun matmul ([FII[FII[FIIIII)V +} + +public final class sk/ainet/exec/kernel/ScalarQ4_0MatmulKernel : sk/ainet/backend/api/kernel/Q4_0MatmulKernel { + public static final field INSTANCE Lsk/ainet/exec/kernel/ScalarQ4_0MatmulKernel; + public fun matmul ([FI[BIII[FI)V +} + +public final class sk/ainet/exec/kernel/ScalarQ8_0MatmulKernel : sk/ainet/backend/api/kernel/Q8_0MatmulKernel { + public static final field INSTANCE Lsk/ainet/exec/kernel/ScalarQ8_0MatmulKernel; + public fun matmul ([FI[BIII[FI)V +} + public final class sk/ainet/exec/tensor/ops/DefaultCpuOps : sk/ainet/exec/tensor/ops/DefaultCpuOpsBase { public fun (Lsk/ainet/lang/tensor/data/TensorDataFactory;)V } @@ -49,7 +153,9 @@ public class sk/ainet/exec/tensor/ops/DefaultCpuOpsBase : sk/ainet/lang/tensor/o public fun conv1d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIII)Lsk/ainet/lang/tensor/Tensor; public fun conv2d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;I)Lsk/ainet/lang/tensor/Tensor; public fun conv3d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Triple;Lkotlin/Triple;Lkotlin/Triple;I)Lsk/ainet/lang/tensor/Tensor; + public fun convTranspose1d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIII)Lsk/ainet/lang/tensor/Tensor; public fun convert (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/Tensor; + public fun cos (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun divScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public fun divide (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; protected final fun elementwise (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/jvm/functions/Function3;)Lsk/ainet/lang/tensor/Tensor; @@ -64,6 +170,9 @@ public class sk/ainet/exec/tensor/ops/DefaultCpuOpsBase : sk/ainet/lang/tensor/o protected final fun gradStateFrom ([Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/GradState; public fun indexSelect (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun leakyRelu (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; + public fun log (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun log10 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun log2 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun logSoftmax (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun lt (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; protected final fun mapIndex ([ILsk/ainet/lang/tensor/Shape;)[I @@ -75,6 +184,9 @@ public class sk/ainet/exec/tensor/ops/DefaultCpuOpsBase : sk/ainet/lang/tensor/o public fun narrow (Lsk/ainet/lang/tensor/Tensor;III)Lsk/ainet/lang/tensor/Tensor; protected final fun newTensor (Lsk/ainet/lang/tensor/data/TensorData;Lkotlin/reflect/KClass;[Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun pad2d (Lsk/ainet/lang/tensor/Tensor;IIII)Lsk/ainet/lang/tensor/Tensor; + public fun permute (Lsk/ainet/lang/tensor/Tensor;[I)Lsk/ainet/lang/tensor/Tensor; + public fun pow (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun powScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public fun rdivScalar (Ljava/lang/Number;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun relu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; protected final fun requireSameDType (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)V @@ -84,6 +196,7 @@ public class sk/ainet/exec/tensor/ops/DefaultCpuOpsBase : sk/ainet/lang/tensor/o public fun sigmoid (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun sign (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun silu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun sin (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun softmax (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun split (Lsk/ainet/lang/tensor/Tensor;II)Ljava/util/List; public fun sqrt (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; @@ -91,6 +204,7 @@ public class sk/ainet/exec/tensor/ops/DefaultCpuOpsBase : sk/ainet/lang/tensor/o public fun subScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public fun subtract (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun sum (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;)Lsk/ainet/lang/tensor/Tensor; + public fun tanh (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun transpose (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun tril (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun unfold (Lsk/ainet/lang/tensor/Tensor;III)Lsk/ainet/lang/tensor/Tensor; @@ -115,6 +229,15 @@ protected final class sk/ainet/exec/tensor/ops/DefaultCpuOpsBase$CpuTensor : sk/ public fun zeroGrad ()V } +public final class sk/ainet/exec/tensor/ops/JvmTurboQuantKernels { + public static final field INSTANCE Lsk/ainet/exec/tensor/ops/JvmTurboQuantKernels; + public final fun absMax ([FII)F + public final fun dequantize ([B[F[FI)V + public static synthetic fun dequantize$default (Lsk/ainet/exec/tensor/ops/JvmTurboQuantKernels;[B[F[FIILjava/lang/Object;)V + public final fun quantize ([FI)Lsk/ainet/lang/tensor/ops/turboquant/QuantizedVector; + public final fun walshHadamardButterfly ([FII)V +} + public final class sk/ainet/java/SKaiNET { public static final field INSTANCE Lsk/ainet/java/SKaiNET; public static final fun context ()Lsk/ainet/context/ExecutionContext; diff --git a/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/kernel/ScalarKernelProvider.kt b/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/kernel/ScalarKernelProvider.kt index 080377a7..a7c13ccd 100644 --- a/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/kernel/ScalarKernelProvider.kt +++ b/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/kernel/ScalarKernelProvider.kt @@ -3,6 +3,7 @@ package sk.ainet.exec.kernel import sk.ainet.backend.api.kernel.Bf16MatmulKernel import sk.ainet.backend.api.kernel.Fp32MatmulKernel import sk.ainet.backend.api.kernel.KernelProvider +import sk.ainet.backend.api.kernel.Q4_0MatmulKernel import sk.ainet.backend.api.kernel.Q8_0MatmulKernel /** @@ -25,4 +26,5 @@ public object ScalarKernelProvider : KernelProvider { override fun matmulFp32(): Fp32MatmulKernel = ScalarMatmulKernel override fun matmulBf16(): Bf16MatmulKernel = ScalarBf16MatmulKernel override fun matmulQ8_0(): Q8_0MatmulKernel = ScalarQ8_0MatmulKernel + override fun matmulQ4_0(): Q4_0MatmulKernel = ScalarQ4_0MatmulKernel } diff --git a/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/kernel/ScalarQ4_0MatmulKernel.kt b/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/kernel/ScalarQ4_0MatmulKernel.kt new file mode 100644 index 00000000..6a844e86 --- /dev/null +++ b/skainet-backends/skainet-backend-cpu/src/commonMain/kotlin/sk/ainet/exec/kernel/ScalarQ4_0MatmulKernel.kt @@ -0,0 +1,96 @@ +package sk.ainet.exec.kernel + +import sk.ainet.backend.api.kernel.Q4_0MatmulKernel + +/** + * Scalar reference implementation of [Q4_0MatmulKernel] — straight + * per-block dequant + per-element FMA, no SIMD. Always available on + * every KMP target. Used as: + * + * - The correctness reference that accelerated kernels (Panama Vector, + * native FFM) must match within FP order tolerance. + * - A guaranteed fallback when no accelerated provider is registered. + * + * Block layout (32-element block, 18 bytes): + * - bytes 0..1 : FP16 little-endian scale (`d`) + * - bytes 2..17: 16 bytes packing 32 4-bit codes (split layout) + * + * Dequant per element: `(code - 8) * d`. No min / offset. + * + * Performance is intentionally modest; production paths should pick the + * Panama Vector or native variant via the kernel registry. + */ +public object ScalarQ4_0MatmulKernel : Q4_0MatmulKernel { + + private const val BLOCK_SIZE = 32 + private const val BYTES_PER_BLOCK = 18 + + override fun matmul( + input: FloatArray, inputOffset: Int, + weight: ByteArray, weightByteOffset: Int, + inputDim: Int, outputDim: Int, + output: FloatArray, outputOffset: Int, + ) { + require(inputDim % BLOCK_SIZE == 0) { + "ScalarQ4_0MatmulKernel: inputDim must be a multiple of $BLOCK_SIZE; got $inputDim" + } + if (outputDim == 0 || inputDim == 0) { + if (outputDim > 0) { + for (o in 0 until outputDim) output[outputOffset + o] = 0f + } + return + } + val blocksPerInputDim = inputDim / BLOCK_SIZE + + for (o in 0 until outputDim) { + var acc = 0f + for (blockIdx in 0 until blocksPerInputDim) { + val blockBase = weightByteOffset + (blockIdx * outputDim + o) * BYTES_PER_BLOCK + // FP16 scale: two LE bytes. + val dBits = (weight[blockBase].toInt() and 0xFF) or + ((weight[blockBase + 1].toInt() and 0xFF) shl 8) + val d = halfToFloat(dBits) + // 32 codes, blockIdx-th window of the input vector. Split + // layout: low nibbles → elements 0..15, high → 16..31. + val inputBase = inputOffset + blockIdx * BLOCK_SIZE + val codesBase = blockBase + 2 + for (j in 0 until 16) { + val b = weight[codesBase + j].toInt() and 0xFF + val lo = (b and 0x0F) - 8 + val hi = (b ushr 4) - 8 + acc += input[inputBase + j] * lo * d + acc += input[inputBase + 16 + j] * hi * d + } + } + output[outputOffset + o] = acc + } + } + + /** + * Convert a 16-bit IEEE-754 half-precision value (low 16 bits of + * [hbits]) to FP32. Mirrors [ScalarQ8_0MatmulKernel]'s inlined helper + * — the skainet-lang-core dequant helper is internal to that module. + */ + private fun halfToFloat(hbits: Int): Float { + val sign = (hbits and 0x8000) shl 16 + val exp = (hbits and 0x7C00) shr 10 + val mant = hbits and 0x03FF + return when (exp) { + 0 -> { + if (mant == 0) Float.fromBits(sign) + else { + var m = mant + var e = -14 + while ((m and 0x400) == 0) { + m = m shl 1 + e-- + } + m = m and 0x3FF + Float.fromBits(sign or ((e + 127) shl 23) or (m shl 13)) + } + } + 31 -> Float.fromBits(sign or (0xFF shl 23) or (mant shl 13)) + else -> Float.fromBits(sign or ((exp - 15 + 127) shl 23) or (mant shl 13)) + } + } +} diff --git a/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/PanamaVectorKernelProvider.kt b/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/PanamaVectorKernelProvider.kt index ba978052..ecc68cf5 100644 --- a/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/PanamaVectorKernelProvider.kt +++ b/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/PanamaVectorKernelProvider.kt @@ -4,6 +4,7 @@ import sk.ainet.backend.api.kernel.Bf16MatmulKernel import sk.ainet.backend.api.kernel.Fp32MatmulKernel import sk.ainet.backend.api.kernel.KernelProvider import sk.ainet.backend.api.kernel.Q4KMatmulKernel +import sk.ainet.backend.api.kernel.Q4_0MatmulKernel import sk.ainet.backend.api.kernel.Q8_0MatmulKernel import sk.ainet.exec.tensor.ops.JvmCpuBackendConfig @@ -49,6 +50,9 @@ public object PanamaVectorKernelProvider : KernelProvider { override fun matmulQ8_0(): Q8_0MatmulKernel? = if (isAvailable()) PanamaVectorQ8_0MatmulKernel else null + override fun matmulQ4_0(): Q4_0MatmulKernel? = + if (isAvailable()) PanamaVectorQ4_0MatmulKernel else null + private fun isVectorApiClassLoaded(): Boolean = runCatching { Class.forName("jdk.incubator.vector.FloatVector") Class.forName("jdk.incubator.vector.VectorSpecies") diff --git a/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/PanamaVectorQ4_0MatmulKernel.kt b/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/PanamaVectorQ4_0MatmulKernel.kt new file mode 100644 index 00000000..d3ca54b9 --- /dev/null +++ b/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/PanamaVectorQ4_0MatmulKernel.kt @@ -0,0 +1,114 @@ +package sk.ainet.exec.kernel + +import jdk.incubator.vector.FloatVector +import jdk.incubator.vector.VectorOperators +import jdk.incubator.vector.VectorSpecies +import sk.ainet.backend.api.kernel.Q4_0MatmulKernel + +/** + * SIMD-vectorized FP32 × Q4_0 matmul on the JDK Vector API. + * + * Pipeline per 32-element block: + * 1. Decode the 2-byte FP16 scale `d` once. + * 2. Unpack the 16 code bytes into 32 sign-corrected floats (`nibble - 8`) + * in a reusable scratch buffer, using the canonical ggml **split** + * layout (low nibbles → elements 0..15, high nibbles → 16..31). The + * nibble-pair-per-byte packing makes a fully-fused `ByteVector` + * pipeline awkward, so this kernel keeps the scratch-then-FMA shape + * (same approach as the legacy `JvmQuantizedVectorKernels` Q4_0 path). + * 3. SIMD-FMA the scratch against the matching input window into a + * lane-wise block accumulator, reduce across lanes, and fold `* d` + * exactly once per block. + * + * Numerical equivalence with [ScalarQ4_0MatmulKernel] is within FMA + + * reordered-reduction tolerance — the same bar the Q8_0 / Q4_K Panama + * kernels use. + */ +public object PanamaVectorQ4_0MatmulKernel : Q4_0MatmulKernel { + + private const val BLOCK_SIZE = 32 + private const val BYTES_PER_BLOCK = 18 + + private val floatSpecies: VectorSpecies = FloatVector.SPECIES_PREFERRED + + override fun matmul( + input: FloatArray, inputOffset: Int, + weight: ByteArray, weightByteOffset: Int, + inputDim: Int, outputDim: Int, + output: FloatArray, outputOffset: Int, + ) { + require(inputDim % BLOCK_SIZE == 0) { + "PanamaVectorQ4_0MatmulKernel: inputDim must be a multiple of $BLOCK_SIZE; got $inputDim" + } + if (outputDim == 0) return + if (inputDim == 0) { + for (o in 0 until outputDim) output[outputOffset + o] = 0f + return + } + val blocksPerInputDim = inputDim / BLOCK_SIZE + val step = floatSpecies.length() + val loopBound = floatSpecies.loopBound(BLOCK_SIZE) + val codeBuf = FloatArray(BLOCK_SIZE) + + for (o in 0 until outputDim) { + var acc = 0f + for (blockIdx in 0 until blocksPerInputDim) { + val blockBase = weightByteOffset + (blockIdx * outputDim + o) * BYTES_PER_BLOCK + // FP16 scale — two LE bytes. + val dBits = (weight[blockBase].toInt() and 0xFF) or + ((weight[blockBase + 1].toInt() and 0xFF) shl 8) + val d = halfToFloat(dBits) + + // Split-layout unpack: low nibbles → 0..15, high → 16..31. + val codesBase = blockBase + 2 + for (j in 0 until 16) { + val b = weight[codesBase + j].toInt() and 0xFF + codeBuf[j] = ((b and 0x0F) - 8).toFloat() + codeBuf[16 + j] = ((b ushr 4) - 8).toFloat() + } + + val inputBase = inputOffset + blockIdx * BLOCK_SIZE + var blockAccVec = FloatVector.zero(floatSpecies) + var k = 0 + while (k < loopBound) { + val inV = FloatVector.fromArray(floatSpecies, input, inputBase + k) + val cV = FloatVector.fromArray(floatSpecies, codeBuf, k) + blockAccVec = inV.fma(cV, blockAccVec) + k += step + } + var blockAcc = blockAccVec.reduceLanes(VectorOperators.ADD) + // Scalar tail (only if floatSpecies.length() doesn't divide 32 — rare). + while (k < BLOCK_SIZE) { + blockAcc += input[inputBase + k] * codeBuf[k] + k++ + } + acc += blockAcc * d + } + output[outputOffset + o] = acc + } + } + + /** Same FP16 → FP32 conversion as [ScalarQ4_0MatmulKernel]. */ + private fun halfToFloat(hbits: Int): Float { + val sign = (hbits and 0x8000) shl 16 + val exp = (hbits and 0x7C00) shr 10 + val mant = hbits and 0x03FF + return when (exp) { + 0 -> { + if (mant == 0) Float.fromBits(sign) + else { + var m = mant + var e = -14 + while ((m and 0x400) == 0) { + m = m shl 1 + e-- + } + m = m and 0x3FF + Float.fromBits(sign or ((e + 127) shl 23) or (m shl 13)) + } + } + 31 -> Float.fromBits(sign or (0xFF shl 23) or (mant shl 13)) + else -> Float.fromBits(sign or ((exp - 15 + 127) shl 23) or (mant shl 13)) + } + } +} diff --git a/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOpsJvm.kt b/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOpsJvm.kt index 703beebf..b70abfd9 100644 --- a/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOpsJvm.kt +++ b/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/tensor/ops/DefaultCpuOpsJvm.kt @@ -9,9 +9,11 @@ import sk.ainet.backend.api.kernel.KernelRegistry import sk.ainet.backend.api.kernel.KernelServiceLoader import sk.ainet.backend.api.kernel.KernelStrictness import sk.ainet.backend.api.kernel.Q4KMatmulKernel +import sk.ainet.backend.api.kernel.Q4_0MatmulKernel import sk.ainet.backend.api.kernel.Q8_0MatmulKernel import sk.ainet.exec.kernel.ScalarBf16MatmulKernel import sk.ainet.exec.kernel.ScalarMatmulKernel +import sk.ainet.exec.kernel.ScalarQ4_0MatmulKernel import sk.ainet.lang.tensor.Shape import sk.ainet.lang.tensor.Tensor import sk.ainet.lang.tensor.data.DenseFloatArrayTensorData @@ -21,6 +23,7 @@ import sk.ainet.lang.tensor.data.MemorySegmentTensorData import sk.ainet.lang.tensor.data.Q4MemorySegmentMarker import sk.ainet.lang.tensor.data.Q4MemorySegmentTensorData import sk.ainet.lang.tensor.data.Bf16TensorData +import sk.ainet.lang.tensor.data.Q4_0TensorData import sk.ainet.lang.tensor.data.Q8_0TensorData import sk.ainet.lang.tensor.data.Q8MemorySegmentMarker import sk.ainet.lang.tensor.data.Q8MemorySegmentTensorData @@ -113,6 +116,24 @@ internal class DefaultCpuOpsJvm( ?: ScalarBf16MatmulKernel } + /** + * Q4_0 matmul kernel resolved via [KernelRegistry]. Mirrors + * [bf16MatmulKernel]: non-null, picks the highest-priority provider + * that carries a Q4_0 kernel (native FFM at 100, Panama Vector at + * 50), falling back to [ScalarQ4_0MatmulKernel] — the scalar SPI + * kernel is the floor (every `KernelProvider` carries one), so Q4_0 + * has no pre-SPI legacy fallback to thread through. + */ + private val q4_0MatmulKernel: Q4_0MatmulKernel by lazy { + if (KernelRegistry.providers().isEmpty()) { + KernelServiceLoader.installAll() + } + KernelRegistry.providers() + .firstOrNull { it.isAvailable() && it.matmulQ4_0() != null } + ?.matmulQ4_0() + ?: ScalarQ4_0MatmulKernel + } + override fun add(a: Tensor, b: Tensor): Tensor { vectorFloatBinary(a, b, { x, y -> x.add(y) }) { x, y -> x + y }?.let { return it } return super.add(a, b) @@ -521,6 +542,22 @@ internal class DefaultCpuOpsJvm( @Suppress("UNCHECKED_CAST") CpuTensor(outData as TensorData, this, a.dtype) } + is Q4_0TensorData -> { + val outBuffer = FloatArray(batchSize * outputDim) + for (batch in 0 until batchSize) { + val batchInput = if (batchSize == 1) inputBuffer + else inputBuffer.copyOfRange(batch * inputDim, (batch + 1) * inputDim) + q4_0MatmulKernel.matmul( + batchInput, 0, + bData.packedData, 0, + inputDim, outputDim, + outBuffer, batch * outputDim, + ) + } + val outData = DenseFloatArrayTensorData(Shape(batchSize, outputDim), outBuffer) + @Suppress("UNCHECKED_CAST") + CpuTensor(outData as TensorData, this, a.dtype) + } is Q4_KTensorData -> { val outBuffer = FloatArray(batchSize * outputDim) val spiKernel = q4kMatmulKernel diff --git a/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/tensor/ops/JvmQuantizedVectorKernels.kt b/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/tensor/ops/JvmQuantizedVectorKernels.kt index 94cb5202..8f726ef6 100644 --- a/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/tensor/ops/JvmQuantizedVectorKernels.kt +++ b/skainet-backends/skainet-backend-cpu/src/jvmMain/kotlin/sk/ainet/exec/tensor/ops/JvmQuantizedVectorKernels.kt @@ -549,13 +549,14 @@ internal object JvmQuantizedVectorKernels { // Read f16 scale val scale = halfToFloat(read2BytesLE(weightSeg, blockByteOffset)) - // Unpack 16 packed bytes → 32 sign-corrected nibbles. Two - // nibbles per byte load means half the byte traffic of the - // straight scalar dot product. + // Unpack 16 packed bytes → 32 sign-corrected nibbles in the + // canonical ggml *split* layout: low nibbles decode elements + // 0..15, high nibbles decode elements 16..31. (Matches + // DequantOps.dequantQ4_0FromBytes and Q4_0BlockTensorData.) for (k in 0 until 16) { val b = weightSeg.get(JAVA_BYTE_LE, codesOffset + k.toLong()).toInt() and 0xFF - codeBuf[2 * k] = (b and 0x0F).toFloat() - 8f - codeBuf[2 * k + 1] = (b ushr 4).toFloat() - 8f + codeBuf[k] = (b and 0x0F).toFloat() - 8f + codeBuf[16 + k] = (b ushr 4).toFloat() - 8f } // SIMD FMA dot product. diff --git a/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/KernelProviderSupportsTest.kt b/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/KernelProviderSupportsTest.kt index cc68683b..07a83cc3 100644 --- a/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/KernelProviderSupportsTest.kt +++ b/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/KernelProviderSupportsTest.kt @@ -47,6 +47,11 @@ class KernelProviderSupportsTest { p.supports("matmul", listOf("Float32", "Q8_0")), "Q8_0 matmul support must mirror matmulQ8_0() != null", ) + assertEquals( + p.matmulQ4_0() != null, + p.supports("matmul", listOf("Float32", "Q4_0")), + "Q4_0 matmul support must mirror matmulQ4_0() != null", + ) } @Test @@ -62,6 +67,9 @@ class KernelProviderSupportsTest { p.matmulQ4K() != null, p.supports("matmul", listOf("Float32", "Q4_K")), ) + // Scalar carries the Q4_0 floor kernel, so the capability query + // must report it as supported. + assertTrue(p.supports("matmul", listOf("Float32", "Q4_0"))) } @Test diff --git a/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/PanamaVectorQ4_0MatmulKernelParityTest.kt b/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/PanamaVectorQ4_0MatmulKernelParityTest.kt new file mode 100644 index 00000000..d45e6c99 --- /dev/null +++ b/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/PanamaVectorQ4_0MatmulKernelParityTest.kt @@ -0,0 +1,113 @@ +package sk.ainet.exec.kernel + +import kotlin.math.abs +import kotlin.random.Random +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFailsWith +import kotlin.test.assertTrue + +/** + * Numerical parity tests for [PanamaVectorQ4_0MatmulKernel] against + * [ScalarQ4_0MatmulKernel]. Both kernels apply the same FP16-scale + * decode + `(nibble - 8)` dequant in the canonical ggml split layout; + * differences come from FMA + reordered-reduction order only. + * + * Tolerance scales with the number of Q4_0 blocks processed: `1e-2 * + * blocksPerInputDim`, clamped to a `1e-2` floor — mirrors the Q8_0 + * parity test convention. + */ +class PanamaVectorQ4_0MatmulKernelParityTest { + + private val blockSize = 32 + private val bytesPerBlock = 18 + + /** Random Q4_0 packed bytes; scales clamped to a small positive FP16. */ + private fun randomQ4_0Bytes(blocksPerInputDim: Int, outputDim: Int, seed: Int): ByteArray { + val rng = Random(seed) + val numBlocks = blocksPerInputDim * outputDim + val bytes = ByteArray(numBlocks * bytesPerBlock) + rng.nextBytes(bytes) + for (block in 0 until numBlocks) { + val base = block * bytesPerBlock + bytes[base + 0] = 0x00.toByte() + bytes[base + 1] = 0x22.toByte() // FP16 0x2200 ≈ 7.6e-3 + } + return bytes + } + + private fun assertParity( + inputDim: Int, + outputDim: Int, + seed: Int, + tolPerBlock: Float = 1e-2f, + ) { + val blocksPerInputDim = inputDim / blockSize + val rng = Random(seed) + val input = FloatArray(inputDim) { rng.nextFloat() - 0.5f } + val weight = randomQ4_0Bytes(blocksPerInputDim, outputDim, seed) + val outScalar = FloatArray(outputDim) + val outPanama = FloatArray(outputDim) + + ScalarQ4_0MatmulKernel.matmul(input, 0, weight, 0, inputDim, outputDim, outScalar, 0) + PanamaVectorQ4_0MatmulKernel.matmul(input, 0, weight, 0, inputDim, outputDim, outPanama, 0) + + val tol = (tolPerBlock * blocksPerInputDim.coerceAtLeast(1)).coerceAtLeast(tolPerBlock) + for (i in outScalar.indices) { + val diff = abs(outScalar[i] - outPanama[i]) + assertTrue( + diff <= tol, + "mismatch at $i: scalar=${outScalar[i]} panama=${outPanama[i]} diff=$diff tol=$tol", + ) + } + } + + @Test fun single_block_single_output_matches_scalar() = + assertParity(inputDim = 32, outputDim = 1, seed = 1) + + @Test fun single_block_multiple_outputs_matches_scalar() = + assertParity(inputDim = 32, outputDim = 7, seed = 2) + + @Test fun multiple_blocks_single_output_matches_scalar() = + assertParity(inputDim = 256, outputDim = 1, seed = 3) + + @Test fun llm_typical_attention_proj_matches_scalar() = + assertParity(inputDim = 512, outputDim = 512, seed = 4) + + @Test fun llm_typical_ffn_proj_matches_scalar() = + assertParity(inputDim = 256, outputDim = 1024, seed = 5) + + @Test fun rejects_non_block_aligned_input_dim() { + assertFailsWith { + PanamaVectorQ4_0MatmulKernel.matmul( + FloatArray(31), 0, + ByteArray(bytesPerBlock), 0, + 31, 1, + FloatArray(1), 0, + ) + } + } + + @Test fun zero_input_dim_zeros_output() { + val out = FloatArray(5) { 9f } + PanamaVectorQ4_0MatmulKernel.matmul( + FloatArray(0), 0, + ByteArray(0), 0, + 0, 5, + out, 0, + ) + for (v in out) assertEquals(0f, v, "output should be zeroed for inputDim=0") + } + + @Test fun provider_returns_panama_q4_0_when_available() { + val kernel = PanamaVectorKernelProvider.matmulQ4_0() + if (PanamaVectorKernelProvider.isAvailable()) { + assertTrue( + kernel === PanamaVectorQ4_0MatmulKernel, + "Provider must hand out the Panama Q4_0 kernel when available", + ) + } else { + assertEquals(null, kernel, "Provider must return null when Vector API unavailable") + } + } +} diff --git a/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/tensor/ops/Q4_0MatmulDispatchTest.kt b/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/tensor/ops/Q4_0MatmulDispatchTest.kt new file mode 100644 index 00000000..f005dc25 --- /dev/null +++ b/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/tensor/ops/Q4_0MatmulDispatchTest.kt @@ -0,0 +1,110 @@ +package sk.ainet.exec.tensor.ops + +import kotlin.math.abs +import kotlin.random.Random +import kotlin.test.Test +import kotlin.test.assertTrue +import sk.ainet.context.DirectCpuExecutionContext +import sk.ainet.exec.kernel.ScalarQ4_0MatmulKernel +import sk.ainet.lang.tensor.Shape +import sk.ainet.lang.tensor.Tensor +import sk.ainet.lang.tensor.data.Q4_0BlockTensorData +import sk.ainet.lang.tensor.data.TensorData +import sk.ainet.lang.types.FP32 + +/** + * Integration tests for the FP32 × Q4_0 dispatch path in + * [DefaultCpuOpsJvm.matmul]. Confirms that calling matmul on a + * Q4_0-backed weight tensor produces the same output as the scalar + * Q4_0 kernel — proving the dispatch actually routes through the + * registered Q4_0 SPI kernel (or the scalar floor). Mirrors + * [Q8_0MatmulDispatchTest]; pins integration, not kernel correctness. + */ +class Q4_0MatmulDispatchTest { + + private val ctx = DirectCpuExecutionContext() + + private val blockSize = 32 + private val bytesPerBlock = 18 + + private fun randomQ4_0Bytes(blocksPerInputDim: Int, outputDim: Int, seed: Int): ByteArray { + val rng = Random(seed) + val numBlocks = blocksPerInputDim * outputDim + val bytes = ByteArray(numBlocks * bytesPerBlock) + rng.nextBytes(bytes) + for (block in 0 until numBlocks) { + val base = block * bytesPerBlock + // FP16 scale ≈ 7.6e-3 (0x2200) — safely finite, non-zero. + bytes[base + 0] = 0x00.toByte() + bytes[base + 1] = 0x22.toByte() + } + return bytes + } + + private fun scalarQ4_0Reference( + input: FloatArray, weight: ByteArray, + inputDim: Int, outputDim: Int, + batchSize: Int, + ): FloatArray { + val out = FloatArray(batchSize * outputDim) + for (b in 0 until batchSize) { + ScalarQ4_0MatmulKernel.matmul( + input, b * inputDim, + weight, 0, + inputDim, outputDim, + out, b * outputDim, + ) + } + return out + } + + private fun assertDispatchMatchesScalar( + batchSize: Int, inputDim: Int, outputDim: Int, seed: Int, + tolPerBlock: Float = 1e-2f, + ) { + val rng = Random(seed) + val inputFloats = FloatArray(batchSize * inputDim) { rng.nextFloat() - 0.5f } + val blocksPerInputDim = inputDim / blockSize + + val weightBytes = randomQ4_0Bytes(blocksPerInputDim, outputDim, seed) + // Logical shape of a Q4_0 weight tensor is [inputDim, outputDim]. + @Suppress("UNCHECKED_CAST") + val td = Q4_0BlockTensorData(Shape(inputDim, outputDim), weightBytes) as TensorData + val weight = ctx.fromData(td, FP32::class) + val input = ctx.fromFloatArray( + Shape(batchSize, inputDim), FP32::class, inputFloats, + ) + + val out = ctx.ops.matmul(input, weight) + val outArr = out.data.copyToFloatArray() + + val expected = scalarQ4_0Reference(inputFloats, weightBytes, inputDim, outputDim, batchSize) + + val tol = (tolPerBlock * blocksPerInputDim.coerceAtLeast(1)).coerceAtLeast(tolPerBlock) + for (i in expected.indices) { + val diff = abs(expected[i] - outArr[i]) + assertTrue( + diff <= tol, + "dispatch mismatch at $i: expected=${expected[i]} got=${outArr[i]} diff=$diff tol=$tol", + ) + } + } + + @Test + fun single_batch_matmul_against_q4_0_weight_routes_correctly() { + // batchSize=1 hits the optimized "no copyOfRange" branch in chooseQuantizedMatmul. + assertDispatchMatchesScalar(batchSize = 1, inputDim = 128, outputDim = 64, seed = 1) + } + + @Test + fun multi_batch_matmul_against_q4_0_weight_routes_correctly() { + // batchSize>1 exercises the per-row copyOfRange branch. + assertDispatchMatchesScalar(batchSize = 3, inputDim = 256, outputDim = 32, seed = 2) + } + + @Test + fun llm_typical_attention_proj_matmul_routes_correctly() { + // Realistic attention-projection size (matvec at dim×dim). + assertDispatchMatchesScalar(batchSize = 1, inputDim = 512, outputDim = 512, seed = 3) + } +} diff --git a/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/tensor/ops/QuantizedMemSegMatmulTest.kt b/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/tensor/ops/QuantizedMemSegMatmulTest.kt index 38f5593e..30c3fd07 100644 --- a/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/tensor/ops/QuantizedMemSegMatmulTest.kt +++ b/skainet-backends/skainet-backend-cpu/src/jvmTest/kotlin/sk/ainet/exec/tensor/ops/QuantizedMemSegMatmulTest.kt @@ -47,6 +47,8 @@ class QuantizedMemSegMatmulTest { /** * Encode a single Q4_0 block: 32 float values -> 18 bytes (2 scale + 16 packed nibbles). + * Uses the canonical ggml *split* layout: code[j] is the low nibble of + * byte j, code[j+16] is the high nibble of byte j. */ private fun encodeQ4_0Block(values: FloatArray): ByteArray { require(values.size == 32) @@ -62,8 +64,8 @@ class QuantizedMemSegMatmulTest { val out = ByteArray(18) out[0] = (scaleHalf and 0xFF).toByte() out[1] = ((scaleHalf shr 8) and 0xFF).toByte() - for (i in 0 until 16) { - out[2 + i] = ((codes[2 * i + 1] shl 4) or codes[2 * i]).toByte() + for (j in 0 until 16) { + out[2 + j] = ((codes[j + 16] shl 4) or codes[j]).toByte() } return out } diff --git a/skainet-backends/skainet-backend-native-cpu/native/CMakeLists.txt b/skainet-backends/skainet-backend-native-cpu/native/CMakeLists.txt index a6655e67..ade06d41 100644 --- a/skainet-backends/skainet-backend-native-cpu/native/CMakeLists.txt +++ b/skainet-backends/skainet-backend-native-cpu/native/CMakeLists.txt @@ -15,6 +15,7 @@ add_library(skainet_kernels SHARED src/fp32_matmul.c src/bf16_matmul.c src/q8_0_matmul.c + src/q4_0_matmul.c ) target_include_directories(skainet_kernels PUBLIC diff --git a/skainet-backends/skainet-backend-native-cpu/native/include/skainet_kernels.h b/skainet-backends/skainet-backend-native-cpu/native/include/skainet_kernels.h index caadf814..a0fa3ff7 100644 --- a/skainet-backends/skainet-backend-native-cpu/native/include/skainet_kernels.h +++ b/skainet-backends/skainet-backend-native-cpu/native/include/skainet_kernels.h @@ -119,6 +119,32 @@ SKAINET_API void skainet_q8_0_matmul( int32_t output_offset ); +/* + * Q4_0 matrix-vector multiply. + * + * output[output_offset + o] = sum_j input[input_offset + j] * + * dequant(weight[block, o, j]) + * + * Block layout: canonical ggml Q4_0, 32 elements per block, 18 bytes + * per block (2 B FP16 scale + 16 B packed 4-bit codes in split layout — + * low nibbles → elements 0..15, high nibbles → 16..31), with packed + * weights laid out as + * weight + weight_byte_offset + (block_idx * output_dim + o) * 18 + * + * Dequant per element: `(code - 8) * d`. input_dim must be a multiple + * of 32. + */ +SKAINET_API void skainet_q4_0_matmul( + const float* input, + int32_t input_offset, + const uint8_t* weight, + int32_t weight_byte_offset, + int32_t input_dim, + int32_t output_dim, + float* output, + int32_t output_offset +); + #ifdef __cplusplus } #endif diff --git a/skainet-backends/skainet-backend-native-cpu/native/src/q4_0_matmul.c b/skainet-backends/skainet-backend-native-cpu/native/src/q4_0_matmul.c new file mode 100644 index 00000000..97111ccf --- /dev/null +++ b/skainet-backends/skainet-backend-native-cpu/native/src/q4_0_matmul.c @@ -0,0 +1,94 @@ +#include "skainet_kernels.h" + +#include +#include +#include + +/* + * Native FP32 × Q4_0 matrix-vector matmul matching the + * sk.ainet.backend.api.kernel.Q4_0MatmulKernel SPI. + * + * Block layout (canonical ggml Q4_0, 32 elements, 18 bytes): + * - bytes 0..1 : FP16 little-endian scale `d` + * - bytes 2..17 : 16 bytes packing 32 4-bit codes in the *split* + * layout — low nibbles decode elements 0..15, high nibbles decode + * elements 16..31. + * + * Per-block packed weight layout: + * weight + weight_byte_offset + (block_idx * output_dim + o) * 18 + * + * Dequant per element: `(code - 8) * d`. The `- 8` bias centres the + * unsigned 4-bit code. Scale `d` is folded once after the block + * accumulator (cheaper than broadcasting it across every inner FMA). + */ + +/* Portable FP16 → FP32 conversion. Matches the Kotlin + * `Q4_0BlockTensorData.halfToFloat` algorithm bit-for-bit. */ +static inline float skainet_q4_0_fp16_to_fp32(uint16_t h) { + uint32_t sign = ((uint32_t)(h & 0x8000u)) << 16; + uint32_t exp = (h >> 10) & 0x1Fu; + uint32_t mant = h & 0x3FFu; + uint32_t bits; + if (exp == 0) { + if (mant == 0) { + bits = sign; + } else { + int e = -14; + while ((mant & 0x400u) == 0) { + mant <<= 1; + --e; + } + mant &= 0x3FFu; + bits = sign | ((uint32_t)(e + 127) << 23) | (mant << 13); + } + } else if (exp == 0x1Fu) { + bits = sign | 0x7F800000u | (mant << 13); + } else { + bits = sign | ((uint32_t)(exp - 15 + 127) << 23) | (mant << 13); + } + float r; + memcpy(&r, &bits, sizeof(r)); + return r; +} + +SKAINET_API void skainet_q4_0_matmul( + const float* SKAINET_RESTRICT input, int32_t input_offset, + const uint8_t* SKAINET_RESTRICT weight, int32_t weight_byte_offset, + int32_t input_dim, int32_t output_dim, + float* SKAINET_RESTRICT output, int32_t output_offset +) { + if (output_dim <= 0) return; + if (input_dim <= 0) { + for (int32_t o = 0; o < output_dim; ++o) { + output[output_offset + o] = 0.0f; + } + return; + } + + const int32_t BLOCK_SIZE = 32; + const int32_t BYTES_PER_BLOCK = 18; + const int32_t blocks_per_input_dim = input_dim / BLOCK_SIZE; + + for (int32_t o = 0; o < output_dim; ++o) { + float acc = 0.0f; + for (int32_t block_idx = 0; block_idx < blocks_per_input_dim; ++block_idx) { + const uint8_t* SKAINET_RESTRICT block = + weight + weight_byte_offset + + (size_t)(block_idx * output_dim + o) * BYTES_PER_BLOCK; + uint16_t d_bits = (uint16_t) block[0] | ((uint16_t) block[1] << 8); + float d = skainet_q4_0_fp16_to_fp32(d_bits); + const uint8_t* SKAINET_RESTRICT codes = block + 2; + const float* SKAINET_RESTRICT input_block = + input + input_offset + (size_t) block_idx * BLOCK_SIZE; + float block_sum = 0.0f; + for (int32_t k = 0; k < 16; ++k) { + int32_t lo = (int32_t)(codes[k] & 0x0F) - 8; + int32_t hi = (int32_t)(codes[k] >> 4) - 8; + block_sum += input_block[k] * (float) lo; + block_sum += input_block[k + 16] * (float) hi; + } + acc += block_sum * d; + } + output[output_offset + o] = acc; + } +} diff --git a/skainet-backends/skainet-backend-native-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/NativeKernelProvider.kt b/skainet-backends/skainet-backend-native-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/NativeKernelProvider.kt index becb0393..60dd45e2 100644 --- a/skainet-backends/skainet-backend-native-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/NativeKernelProvider.kt +++ b/skainet-backends/skainet-backend-native-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/NativeKernelProvider.kt @@ -6,6 +6,7 @@ import sk.ainet.backend.api.kernel.KernelProvider import sk.ainet.backend.api.kernel.MemSegKernelProvider import sk.ainet.backend.api.kernel.Q4KMatmulKernel import sk.ainet.backend.api.kernel.Q4KMemSegMatmulKernel +import sk.ainet.backend.api.kernel.Q4_0MatmulKernel import sk.ainet.backend.api.kernel.Q8_0MatmulKernel /** @@ -93,4 +94,7 @@ public object NativeKernelProvider : KernelProvider, MemSegKernelProvider { override fun matmulQ8_0(): Q8_0MatmulKernel? = if (NativeQ8_0MatmulKernel.isAvailable()) NativeQ8_0MatmulKernel else null + + override fun matmulQ4_0(): Q4_0MatmulKernel? = + if (NativeQ4_0MatmulKernel.isAvailable()) NativeQ4_0MatmulKernel else null } diff --git a/skainet-backends/skainet-backend-native-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/NativeQ4_0MatmulKernel.kt b/skainet-backends/skainet-backend-native-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/NativeQ4_0MatmulKernel.kt new file mode 100644 index 00000000..718b4917 --- /dev/null +++ b/skainet-backends/skainet-backend-native-cpu/src/jvmMain/kotlin/sk/ainet/exec/kernel/NativeQ4_0MatmulKernel.kt @@ -0,0 +1,103 @@ +package sk.ainet.exec.kernel + +import java.lang.foreign.Arena +import java.lang.foreign.FunctionDescriptor +import java.lang.foreign.Linker +import java.lang.foreign.MemorySegment +import java.lang.foreign.ValueLayout +import java.lang.invoke.MethodHandle +import sk.ainet.backend.api.kernel.Q4_0MatmulKernel + +/** + * Native (FFM) implementation of [Q4_0MatmulKernel]. + * + * Wraps the bundled C symbol + * + * void skainet_q4_0_matmul( + * const float* input, int32_t input_offset, + * const uint8_t* weight, int32_t weight_byte_offset, + * int32_t input_dim, int32_t output_dim, + * float* output, int32_t output_offset); + * + * The C kernel decodes the ggml-canonical Q4_0 block (FP16 scale + 16 + * packed bytes, split nibble layout) with `(code - 8) * d` dequant and a + * tight inner FMA the compiler auto-vectorizes under -O3 -ffast-math. + * + * Numerical parity vs [ScalarQ4_0MatmulKernel] is asserted by + * `NativeQ4_0MatmulKernelParityTest` within the same `1e-2 * + * blocksPerInputDim` band the Panama parity uses. + */ +internal object NativeQ4_0MatmulKernel : Q4_0MatmulKernel { + + fun isAvailable(): Boolean = handle != null + + override fun matmul( + input: FloatArray, inputOffset: Int, + weight: ByteArray, weightByteOffset: Int, + inputDim: Int, outputDim: Int, + output: FloatArray, outputOffset: Int, + ) { + require(inputDim % BLOCK_SIZE == 0) { + "NativeQ4_0MatmulKernel: inputDim must be a multiple of $BLOCK_SIZE; got $inputDim" + } + if (outputDim == 0) return + + val mh = handle + ?: error("NativeQ4_0MatmulKernel.matmul invoked while native library unavailable") + + val blocksPerInputDim = inputDim / BLOCK_SIZE + val inputReachFloats = if (inputDim == 0) 0 else inputOffset + inputDim + val weightReachBytes = if (inputDim == 0 || outputDim == 0) 0 + else weightByteOffset + blocksPerInputDim * outputDim * BYTES_PER_BLOCK + val outputReachFloats = outputOffset + outputDim + + Arena.ofConfined().use { arena -> + val fAlign = ValueLayout.JAVA_FLOAT.byteAlignment() + val bAlign = ValueLayout.JAVA_BYTE.byteAlignment() + + val inputSeg: MemorySegment = if (inputReachFloats > 0) + arena.allocate(inputReachFloats.toLong() * java.lang.Float.BYTES, fAlign) + else MemorySegment.NULL + val weightSeg: MemorySegment = if (weightReachBytes > 0) + arena.allocate(weightReachBytes.toLong(), bAlign) + else MemorySegment.NULL + val outputSeg: MemorySegment = + arena.allocate(outputReachFloats.toLong() * java.lang.Float.BYTES, fAlign) + + if (inputReachFloats > 0) { + MemorySegment.copy(input, 0, inputSeg, ValueLayout.JAVA_FLOAT, 0L, inputReachFloats) + } + if (weightReachBytes > 0) { + MemorySegment.copy(weight, 0, weightSeg, ValueLayout.JAVA_BYTE, 0L, weightReachBytes) + } + + mh.invoke( + inputSeg, inputOffset, + weightSeg, weightByteOffset, + inputDim, outputDim, + outputSeg, outputOffset, + ) + + MemorySegment.copy(outputSeg, ValueLayout.JAVA_FLOAT, 0L, output, 0, outputReachFloats) + } + } + + private const val BLOCK_SIZE = 32 + private const val BYTES_PER_BLOCK = 18 + + private val handle: MethodHandle? by lazy { + val lookup = NativeLibraryLoader.lookup() ?: return@lazy null + val symbol = lookup.find("skainet_q4_0_matmul").orElse(null) ?: return@lazy null + val descriptor = FunctionDescriptor.ofVoid( + ValueLayout.ADDRESS, // input + ValueLayout.JAVA_INT, // input_offset + ValueLayout.ADDRESS, // weight + ValueLayout.JAVA_INT, // weight_byte_offset + ValueLayout.JAVA_INT, // input_dim + ValueLayout.JAVA_INT, // output_dim + ValueLayout.ADDRESS, // output + ValueLayout.JAVA_INT, // output_offset + ) + runCatching { Linker.nativeLinker().downcallHandle(symbol, descriptor) }.getOrNull() + } +} diff --git a/skainet-backends/skainet-backend-native-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/NativeQ4_0MatmulKernelParityTest.kt b/skainet-backends/skainet-backend-native-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/NativeQ4_0MatmulKernelParityTest.kt new file mode 100644 index 00000000..e6e1a198 --- /dev/null +++ b/skainet-backends/skainet-backend-native-cpu/src/jvmTest/kotlin/sk/ainet/exec/kernel/NativeQ4_0MatmulKernelParityTest.kt @@ -0,0 +1,117 @@ +package sk.ainet.exec.kernel + +import kotlin.math.abs +import kotlin.random.Random +import kotlin.test.BeforeTest +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertFailsWith +import kotlin.test.assertTrue + +/** + * Numerical parity tests for [NativeQ4_0MatmulKernel] against + * [PanamaVectorQ4_0MatmulKernel]. Same FP16 scale decode + split-layout + * `(nibble - 8)` dequant in both kernels; differences come from FMA + + * reordered-reduction only. + * + * Tolerance: `1e-2 * blocksPerInputDim` (matches the Panama / Q8_0 + * parity convention). + */ +class NativeQ4_0MatmulKernelParityTest { + + private val blockSize = 32 + private val bytesPerBlock = 18 + + @BeforeTest + fun checkAvailable() { + assertTrue( + NativeQ4_0MatmulKernel.isAvailable(), + "Native Q4_0 kernel must be available — bundled libskainet_kernels missing or " + + "skainet_q4_0_matmul symbol unresolved", + ) + } + + private fun randomQ4_0Bytes(blocksPerInputDim: Int, outputDim: Int, seed: Int): ByteArray { + val rng = Random(seed) + val numBlocks = blocksPerInputDim * outputDim + val bytes = ByteArray(numBlocks * bytesPerBlock) + rng.nextBytes(bytes) + for (block in 0 until numBlocks) { + val base = block * bytesPerBlock + bytes[base + 0] = 0x00.toByte() + bytes[base + 1] = 0x22.toByte() // FP16 ~ 7.6e-3, comfortably finite + non-zero + } + return bytes + } + + private fun assertParity( + inputDim: Int, + outputDim: Int, + seed: Int, + tolPerBlock: Float = 1e-2f, + ) { + val blocksPerInputDim = inputDim / blockSize + val rng = Random(seed) + val input = FloatArray(inputDim) { rng.nextFloat() - 0.5f } + val weight = randomQ4_0Bytes(blocksPerInputDim, outputDim, seed) + val outPanama = FloatArray(outputDim) + val outNative = FloatArray(outputDim) + + PanamaVectorQ4_0MatmulKernel.matmul(input, 0, weight, 0, inputDim, outputDim, outPanama, 0) + NativeQ4_0MatmulKernel.matmul(input, 0, weight, 0, inputDim, outputDim, outNative, 0) + + val tol = (tolPerBlock * blocksPerInputDim.coerceAtLeast(1)).coerceAtLeast(tolPerBlock) + for (i in outPanama.indices) { + val diff = abs(outPanama[i] - outNative[i]) + assertTrue( + diff <= tol, + "mismatch at $i: panama=${outPanama[i]} native=${outNative[i]} diff=$diff tol=$tol", + ) + } + } + + @Test fun single_block_single_output_matches_panama() = + assertParity(inputDim = 32, outputDim = 1, seed = 1) + + @Test fun single_block_multiple_outputs_matches_panama() = + assertParity(inputDim = 32, outputDim = 7, seed = 2) + + @Test fun multiple_blocks_single_output_matches_panama() = + assertParity(inputDim = 256, outputDim = 1, seed = 3) + + @Test fun llm_typical_attention_proj_matches_panama() = + assertParity(inputDim = 512, outputDim = 512, seed = 4) + + @Test fun llm_typical_ffn_proj_matches_panama() = + assertParity(inputDim = 256, outputDim = 1024, seed = 5) + + @Test fun rejects_non_block_aligned_input_dim() { + assertFailsWith { + NativeQ4_0MatmulKernel.matmul( + FloatArray(31), 0, + ByteArray(bytesPerBlock), 0, + 31, 1, + FloatArray(1), 0, + ) + } + } + + @Test fun zero_input_dim_zeros_output() { + val out = FloatArray(5) { 9f } + NativeQ4_0MatmulKernel.matmul( + FloatArray(0), 0, + ByteArray(0), 0, + 0, 5, + out, 0, + ) + for (v in out) assertEquals(0f, v, "output should be zeroed for inputDim=0") + } + + @Test fun provider_returns_native_q4_0_when_available() { + val kernel = NativeKernelProvider.matmulQ4_0() + assertTrue( + kernel === NativeQ4_0MatmulKernel, + "Provider must hand out the native Q4_0 kernel when bundled lib is loaded", + ) + } +} diff --git a/skainet-compile/skainet-compile-dag/api/jvm/skainet-compile-dag.api b/skainet-compile/skainet-compile-dag/api/jvm/skainet-compile-dag.api index 8c536848..c7fdaeb8 100644 --- a/skainet-compile/skainet-compile-dag/api/jvm/skainet-compile-dag.api +++ b/skainet-compile/skainet-compile-dag/api/jvm/skainet-compile-dag.api @@ -92,6 +92,9 @@ public final class sk/ainet/lang/graph/DefaultGradientTape : sk/ainet/lang/graph public fun geluBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun getComputeGradients ()Z public fun leakyReluBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public fun log10Backward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public fun log2Backward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public fun logBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun logSoftmaxBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun matmulBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun maxPool2dBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; @@ -100,6 +103,9 @@ public final class sk/ainet/lang/graph/DefaultGradientTape : sk/ainet/lang/graph public fun multiplyBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun narrowBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun pad2dBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public fun permuteBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public fun powBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public fun powScalarBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun rdivScalarBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun recordOperation (Lsk/ainet/lang/tensor/ops/Operation;Ljava/util/List;Ljava/util/List;)V public fun recordTrace (Lsk/ainet/lang/trace/OpTrace;)V @@ -119,6 +125,7 @@ public final class sk/ainet/lang/graph/DefaultGradientTape : sk/ainet/lang/graph public fun subtractBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun sumBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public final fun suppressRecording ()V + public fun tanhBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun transposeBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun unsqueezeBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public fun upsample2dBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; @@ -147,15 +154,19 @@ public final class sk/ainet/lang/graph/DefaultGraphExecutionContext : sk/ainet/l public fun getHooks ()Lsk/ainet/lang/nn/hooks/ForwardHooks; public fun getInTraining ()Z public fun getMemoryInfo ()Lsk/ainet/context/MemoryInfo; + public fun getMemoryPlanner ()Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public fun getMemoryTracker ()Lsk/ainet/lang/tensor/storage/MemoryTracker; public fun getObservers ()Lsk/ainet/context/ExecutionObserverRegistry; public fun getOps ()Lsk/ainet/lang/tensor/ops/KspTensorOps; public synthetic fun getOps ()Lsk/ainet/lang/tensor/ops/TensorOps; public fun getPhase ()Lsk/ainet/context/Phase; + public fun getScratch ()Lsk/ainet/lang/tensor/scratch/ScratchPool; public final fun getSession ()Lsk/ainet/lang/trace/TraceSession; public fun getTapeStack ()Lsk/ainet/tape/TapeStack; public fun getTensorDataFactory ()Lsk/ainet/lang/tensor/data/TensorDataFactory; public fun isRecording ()Z public fun ones (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public fun placeholder (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public final fun record (Lkotlin/jvm/functions/Function1;)Lkotlin/Pair; public fun registerObserver (Lsk/ainet/context/ExecutionObserver;)V public fun resetExecutionStats ()V @@ -164,6 +175,9 @@ public final class sk/ainet/lang/graph/DefaultGraphExecutionContext : sk/ainet/l public fun stopRecording ()Lsk/ainet/tape/ExecutionTape; public final fun stopRecordingAndGet ()Lsk/ainet/tape/ExecutionTape; public fun unregisterObserver (Lsk/ainet/context/ExecutionObserver;)V + public fun wrapByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public fun wrapFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public fun wrapIntArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public fun zeros (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } @@ -263,6 +277,34 @@ public final class sk/ainet/lang/graph/GraphNode { public fun toString ()Ljava/lang/String; } +public abstract interface class sk/ainet/lang/graph/Layout { +} + +public final class sk/ainet/lang/graph/ResolvedComputeGraph { + public fun (Lsk/ainet/lang/graph/ComputeGraph;)V + public final fun backendAssignment (Ljava/lang/String;)Ljava/lang/String; + public final fun getDelegate ()Lsk/ainet/lang/graph/ComputeGraph; + public final fun getEdges ()Ljava/util/List; + public final fun getNodes ()Ljava/util/List; + public final fun resolvedDtype (Ljava/lang/String;)Lsk/ainet/lang/types/DType; + public final fun resolvedLayout (Ljava/lang/String;)Lsk/ainet/lang/graph/Layout; + public final fun validate ()Lsk/ainet/lang/graph/ResolvedGraphValidation; +} + +public final class sk/ainet/lang/graph/ResolvedGraphValidation { + public fun (ZLjava/util/List;)V + public final fun component1 ()Z + public final fun component2 ()Ljava/util/List; + public final fun copy (ZLjava/util/List;)Lsk/ainet/lang/graph/ResolvedGraphValidation; + public static synthetic fun copy$default (Lsk/ainet/lang/graph/ResolvedGraphValidation;ZLjava/util/List;ILjava/lang/Object;)Lsk/ainet/lang/graph/ResolvedGraphValidation; + public fun equals (Ljava/lang/Object;)Z + public final fun getErrors ()Ljava/util/List; + public final fun getValid ()Z + public fun hashCode ()I + public final fun requireValid ()V + public fun toString ()Ljava/lang/String; +} + public final class sk/ainet/lang/graph/SimpleComputeGraph : sk/ainet/lang/graph/ComputeGraph { public fun ()V public fun (Ljava/util/List;Ljava/util/List;)V @@ -369,10 +411,17 @@ public final class sk/ainet/lang/graph/exec/GraphExecutionContext$DefaultImpls { public static fun full (Lsk/ainet/lang/graph/exec/GraphExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public static fun getHooks (Lsk/ainet/lang/graph/exec/GraphExecutionContext;)Lsk/ainet/lang/nn/hooks/ForwardHooks; public static fun getInTraining (Lsk/ainet/lang/graph/exec/GraphExecutionContext;)Z + public static fun getMemoryPlanner (Lsk/ainet/lang/graph/exec/GraphExecutionContext;)Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public static fun getMemoryTracker (Lsk/ainet/lang/graph/exec/GraphExecutionContext;)Lsk/ainet/lang/tensor/storage/MemoryTracker; + public static fun getScratch (Lsk/ainet/lang/graph/exec/GraphExecutionContext;)Lsk/ainet/lang/tensor/scratch/ScratchPool; public static fun isRecording (Lsk/ainet/lang/graph/exec/GraphExecutionContext;)Z public static fun ones (Lsk/ainet/lang/graph/exec/GraphExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public static fun placeholder (Lsk/ainet/lang/graph/exec/GraphExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public static fun registerObserver (Lsk/ainet/lang/graph/exec/GraphExecutionContext;Lsk/ainet/context/ExecutionObserver;)V public static fun unregisterObserver (Lsk/ainet/lang/graph/exec/GraphExecutionContext;Lsk/ainet/context/ExecutionObserver;)V + public static fun wrapByteArray (Lsk/ainet/lang/graph/exec/GraphExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public static fun wrapFloatArray (Lsk/ainet/lang/graph/exec/GraphExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public static fun wrapIntArray (Lsk/ainet/lang/graph/exec/GraphExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public static fun zeros (Lsk/ainet/lang/graph/exec/GraphExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } diff --git a/skainet-compile/skainet-compile-hlo/api/jvm/skainet-compile-hlo.api b/skainet-compile/skainet-compile-hlo/api/jvm/skainet-compile-hlo.api index a81a52a7..8277d959 100644 --- a/skainet-compile/skainet-compile-hlo/api/jvm/skainet-compile-hlo.api +++ b/skainet-compile/skainet-compile-hlo/api/jvm/skainet-compile-hlo.api @@ -113,7 +113,9 @@ public final class sk/ainet/compile/hlo/ConversionResult$Unsupported : sk/ainet/ public final class sk/ainet/compile/hlo/Dag2hloKt { public static final fun toStableHlo (Lsk/ainet/lang/graph/ComputeGraph;Ljava/lang/String;)Lsk/ainet/compile/hlo/StableHloModule; + public static final fun toStableHlo (Lsk/ainet/lang/graph/ResolvedComputeGraph;Ljava/lang/String;Z)Lsk/ainet/compile/hlo/StableHloModule; public static synthetic fun toStableHlo$default (Lsk/ainet/lang/graph/ComputeGraph;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/compile/hlo/StableHloModule; + public static synthetic fun toStableHlo$default (Lsk/ainet/lang/graph/ResolvedComputeGraph;Ljava/lang/String;ZILjava/lang/Object;)Lsk/ainet/compile/hlo/StableHloModule; public static final fun toStableHloLegacy (Lsk/ainet/lang/graph/ComputeGraph;Ljava/lang/String;)Lsk/ainet/compile/hlo/StableHloModule; public static synthetic fun toStableHloLegacy$default (Lsk/ainet/lang/graph/ComputeGraph;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/compile/hlo/StableHloModule; } @@ -323,12 +325,24 @@ public final class sk/ainet/compile/hlo/converters/ReductionOperationsConverter public fun getSupportedOperations ()Ljava/util/Set; } +public final class sk/ainet/compile/hlo/converters/ScalarOperationsConverter : sk/ainet/compile/hlo/StableHloOperationConverter { + public fun ()V + public fun convert (Lsk/ainet/lang/graph/GraphNode;Ljava/util/List;Lsk/ainet/compile/hlo/ConversionContext;)Lsk/ainet/compile/hlo/ConversionResult; + public fun getSupportedOperations ()Ljava/util/Set; +} + public final class sk/ainet/compile/hlo/converters/ShapeOperationsConverter : sk/ainet/compile/hlo/StableHloOperationConverter { public fun ()V public fun convert (Lsk/ainet/lang/graph/GraphNode;Ljava/util/List;Lsk/ainet/compile/hlo/ConversionContext;)Lsk/ainet/compile/hlo/ConversionResult; public fun getSupportedOperations ()Ljava/util/Set; } +public final class sk/ainet/compile/hlo/converters/UnaryMathConverter : sk/ainet/compile/hlo/StableHloOperationConverter { + public fun ()V + public fun convert (Lsk/ainet/lang/graph/GraphNode;Ljava/util/List;Lsk/ainet/compile/hlo/ConversionContext;)Lsk/ainet/compile/hlo/ConversionResult; + public fun getSupportedOperations ()Ljava/util/Set; +} + public final class sk/ainet/compile/hlo/examples/BasicExample { public static final field INSTANCE Lsk/ainet/compile/hlo/examples/BasicExample; public final fun convertToStableHlo (Lsk/ainet/lang/graph/DefaultComputeGraph;)Ljava/lang/String; diff --git a/skainet-compile/skainet-compile-opt/api/jvm/skainet-compile-opt.api b/skainet-compile/skainet-compile-opt/api/jvm/skainet-compile-opt.api index 3ca4c38c..f0f36daf 100644 --- a/skainet-compile/skainet-compile-opt/api/jvm/skainet-compile-opt.api +++ b/skainet-compile/skainet-compile-opt/api/jvm/skainet-compile-opt.api @@ -53,12 +53,28 @@ public final class sk/ainet/compile/opt/passes/ConstantFoldingPass : sk/ainet/co public fun getName ()Ljava/lang/String; } +public final class sk/ainet/compile/opt/passes/DTypeConstraintResolutionPass : sk/ainet/compile/opt/GraphOptimizationPass { + public static final field Companion Lsk/ainet/compile/opt/passes/DTypeConstraintResolutionPass$Companion; + public static final field POLICY_KEY Ljava/lang/String; + public static final field RESOLVED_KEY Ljava/lang/String; + public fun ()V + public fun apply (Lsk/ainet/lang/graph/ComputeGraph;)Lsk/ainet/compile/opt/GraphOptimizationResult; + public fun getName ()Ljava/lang/String; +} + +public final class sk/ainet/compile/opt/passes/DTypeConstraintResolutionPass$Companion { +} + public final class sk/ainet/compile/opt/passes/DeadCodeEliminationPass : sk/ainet/compile/opt/GraphOptimizationPass { public fun ()V public fun apply (Lsk/ainet/lang/graph/ComputeGraph;)Lsk/ainet/compile/opt/GraphOptimizationResult; public fun getName ()Ljava/lang/String; } +public final class sk/ainet/compile/opt/passes/DtypeConstraintViolationException : java/lang/RuntimeException { + public fun (Ljava/lang/String;)V +} + public final class sk/ainet/compile/opt/passes/LLMFusionPass : sk/ainet/compile/opt/GraphOptimizationPass { public fun ()V public fun apply (Lsk/ainet/lang/graph/ComputeGraph;)Lsk/ainet/compile/opt/GraphOptimizationResult; @@ -71,6 +87,12 @@ public final class sk/ainet/compile/opt/passes/OperationFusionPass : sk/ainet/co public fun getName ()Ljava/lang/String; } +public final class sk/ainet/compile/opt/passes/PowSpecializationPass : sk/ainet/compile/opt/GraphOptimizationPass { + public fun ()V + public fun apply (Lsk/ainet/lang/graph/ComputeGraph;)Lsk/ainet/compile/opt/GraphOptimizationResult; + public fun getName ()Ljava/lang/String; +} + public final class sk/ainet/compile/opt/passes/SharedWeightDeduplicationPass : sk/ainet/compile/opt/GraphOptimizationPass { public static final field Companion Lsk/ainet/compile/opt/passes/SharedWeightDeduplicationPass$Companion; public fun ()V diff --git a/skainet-lang/skainet-lang-core/api/jvm/skainet-lang-core.api b/skainet-lang/skainet-lang-core/api/jvm/skainet-lang-core.api index dab99d7e..1805010a 100644 --- a/skainet-lang/skainet-lang-core/api/jvm/skainet-lang-core.api +++ b/skainet-lang/skainet-lang-core/api/jvm/skainet-lang-core.api @@ -204,13 +204,20 @@ public final class sk/ainet/context/DefaultDataExecutionContext : sk/ainet/conte public fun getHooks ()Lsk/ainet/lang/nn/hooks/ForwardHooks; public fun getInTraining ()Z public fun getMemoryInfo ()Lsk/ainet/context/MemoryInfo; + public fun getMemoryPlanner ()Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public fun getMemoryTracker ()Lsk/ainet/lang/tensor/storage/MemoryTracker; public fun getObservers ()Lsk/ainet/context/ExecutionObserverRegistry; public fun getOps ()Lsk/ainet/lang/tensor/ops/TensorOps; public fun getPhase ()Lsk/ainet/context/Phase; + public fun getScratch ()Lsk/ainet/lang/tensor/scratch/ScratchPool; public fun getTensorDataFactory ()Lsk/ainet/lang/tensor/data/TensorDataFactory; public fun ones (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public fun placeholder (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public fun registerObserver (Lsk/ainet/context/ExecutionObserver;)V public fun unregisterObserver (Lsk/ainet/context/ExecutionObserver;)V + public fun wrapByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public fun wrapFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public fun wrapIntArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public fun zeros (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } @@ -224,13 +231,20 @@ public abstract interface class sk/ainet/context/ExecutionContext { public fun getHooks ()Lsk/ainet/lang/nn/hooks/ForwardHooks; public fun getInTraining ()Z public abstract fun getMemoryInfo ()Lsk/ainet/context/MemoryInfo; + public fun getMemoryPlanner ()Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public fun getMemoryTracker ()Lsk/ainet/lang/tensor/storage/MemoryTracker; public abstract fun getObservers ()Lsk/ainet/context/ExecutionObserverRegistry; public abstract fun getOps ()Lsk/ainet/lang/tensor/ops/TensorOps; public abstract fun getPhase ()Lsk/ainet/context/Phase; + public fun getScratch ()Lsk/ainet/lang/tensor/scratch/ScratchPool; public abstract fun getTensorDataFactory ()Lsk/ainet/lang/tensor/data/TensorDataFactory; public fun ones (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public fun placeholder (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public fun registerObserver (Lsk/ainet/context/ExecutionObserver;)V public fun unregisterObserver (Lsk/ainet/context/ExecutionObserver;)V + public fun wrapByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public fun wrapFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public fun wrapIntArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public fun zeros (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } @@ -242,9 +256,16 @@ public final class sk/ainet/context/ExecutionContext$DefaultImpls { public static fun full (Lsk/ainet/context/ExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public static fun getHooks (Lsk/ainet/context/ExecutionContext;)Lsk/ainet/lang/nn/hooks/ForwardHooks; public static fun getInTraining (Lsk/ainet/context/ExecutionContext;)Z + public static fun getMemoryPlanner (Lsk/ainet/context/ExecutionContext;)Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public static fun getMemoryTracker (Lsk/ainet/context/ExecutionContext;)Lsk/ainet/lang/tensor/storage/MemoryTracker; + public static fun getScratch (Lsk/ainet/context/ExecutionContext;)Lsk/ainet/lang/tensor/scratch/ScratchPool; public static fun ones (Lsk/ainet/context/ExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public static fun placeholder (Lsk/ainet/context/ExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public static fun registerObserver (Lsk/ainet/context/ExecutionContext;Lsk/ainet/context/ExecutionObserver;)V public static fun unregisterObserver (Lsk/ainet/context/ExecutionContext;Lsk/ainet/context/ExecutionObserver;)V + public static fun wrapByteArray (Lsk/ainet/context/ExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public static fun wrapFloatArray (Lsk/ainet/context/ExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public static fun wrapIntArray (Lsk/ainet/context/ExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public static fun zeros (Lsk/ainet/context/ExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } @@ -334,13 +355,20 @@ public final class sk/ainet/context/PhaseOverridingExecutionContext : sk/ainet/c public fun getHooks ()Lsk/ainet/lang/nn/hooks/ForwardHooks; public fun getInTraining ()Z public fun getMemoryInfo ()Lsk/ainet/context/MemoryInfo; + public fun getMemoryPlanner ()Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public fun getMemoryTracker ()Lsk/ainet/lang/tensor/storage/MemoryTracker; public fun getObservers ()Lsk/ainet/context/ExecutionObserverRegistry; public fun getOps ()Lsk/ainet/lang/tensor/ops/TensorOps; public fun getPhase ()Lsk/ainet/context/Phase; + public fun getScratch ()Lsk/ainet/lang/tensor/scratch/ScratchPool; public fun getTensorDataFactory ()Lsk/ainet/lang/tensor/data/TensorDataFactory; public fun ones (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public fun placeholder (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public fun registerObserver (Lsk/ainet/context/ExecutionObserver;)V public fun unregisterObserver (Lsk/ainet/context/ExecutionObserver;)V + public fun wrapByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public fun wrapFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public fun wrapIntArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public fun zeros (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } @@ -374,9 +402,16 @@ public final class sk/ainet/context/TrainingExecutionContext$DefaultImpls { public static fun full (Lsk/ainet/context/TrainingExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public static fun getHooks (Lsk/ainet/context/TrainingExecutionContext;)Lsk/ainet/lang/nn/hooks/ForwardHooks; public static fun getInTraining (Lsk/ainet/context/TrainingExecutionContext;)Z + public static fun getMemoryPlanner (Lsk/ainet/context/TrainingExecutionContext;)Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public static fun getMemoryTracker (Lsk/ainet/context/TrainingExecutionContext;)Lsk/ainet/lang/tensor/storage/MemoryTracker; + public static fun getScratch (Lsk/ainet/context/TrainingExecutionContext;)Lsk/ainet/lang/tensor/scratch/ScratchPool; public static fun ones (Lsk/ainet/context/TrainingExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public static fun placeholder (Lsk/ainet/context/TrainingExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public static fun registerObserver (Lsk/ainet/context/TrainingExecutionContext;Lsk/ainet/context/ExecutionObserver;)V public static fun unregisterObserver (Lsk/ainet/context/TrainingExecutionContext;Lsk/ainet/context/ExecutionObserver;)V + public static fun wrapByteArray (Lsk/ainet/context/TrainingExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public static fun wrapFloatArray (Lsk/ainet/context/TrainingExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public static fun wrapIntArray (Lsk/ainet/context/TrainingExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public static fun zeros (Lsk/ainet/context/TrainingExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } @@ -717,13 +752,20 @@ public final class sk/ainet/lang/nn/DefaultNeuralNetworkExecutionContext : sk/ai public fun getHooks ()Lsk/ainet/lang/nn/hooks/ForwardHooks; public fun getInTraining ()Z public fun getMemoryInfo ()Lsk/ainet/context/MemoryInfo; + public fun getMemoryPlanner ()Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public fun getMemoryTracker ()Lsk/ainet/lang/tensor/storage/MemoryTracker; public fun getObservers ()Lsk/ainet/context/ExecutionObserverRegistry; public fun getOps ()Lsk/ainet/lang/tensor/ops/TensorOps; public fun getPhase ()Lsk/ainet/context/Phase; + public fun getScratch ()Lsk/ainet/lang/tensor/scratch/ScratchPool; public fun getTensorDataFactory ()Lsk/ainet/lang/tensor/data/TensorDataFactory; public fun ones (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public fun placeholder (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public fun registerObserver (Lsk/ainet/context/ExecutionObserver;)V public fun unregisterObserver (Lsk/ainet/context/ExecutionObserver;)V + public fun wrapByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public fun wrapFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public fun wrapIntArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public fun zeros (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } @@ -918,6 +960,16 @@ public abstract class sk/ainet/lang/nn/InternalMixedPrecisionModule : sk/ainet/l protected abstract fun forwardImpl (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; } +public final class sk/ainet/lang/nn/LayerScale : sk/ainet/lang/nn/Module, sk/ainet/lang/nn/topology/ModuleParameters { + public fun (ILjava/lang/String;Lsk/ainet/lang/tensor/Tensor;)V + public synthetic fun (ILjava/lang/String;Lsk/ainet/lang/tensor/Tensor;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun forward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/context/ExecutionContext;)Lsk/ainet/lang/tensor/Tensor; + public final fun getDim ()I + public fun getModules ()Ljava/util/List; + public fun getName ()Ljava/lang/String; + public fun getParams ()Ljava/util/List; +} + public final class sk/ainet/lang/nn/Linear : sk/ainet/lang/nn/Module, sk/ainet/lang/nn/topology/ModuleParameters { public fun (IILjava/lang/String;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)V public fun (IILjava/lang/String;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Z)V @@ -998,9 +1050,16 @@ public final class sk/ainet/lang/nn/NeuralNetworkExecutionContext$DefaultImpls { public static fun full (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public static fun getHooks (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;)Lsk/ainet/lang/nn/hooks/ForwardHooks; public static fun getInTraining (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;)Z + public static fun getMemoryPlanner (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;)Lsk/ainet/lang/tensor/storage/MemoryPlanner; + public static fun getMemoryTracker (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;)Lsk/ainet/lang/tensor/storage/MemoryTracker; + public static fun getScratch (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;)Lsk/ainet/lang/tensor/scratch/ScratchPool; public static fun ones (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; + public static fun placeholder (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; public static fun registerObserver (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;Lsk/ainet/context/ExecutionObserver;)V public static fun unregisterObserver (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;Lsk/ainet/context/ExecutionObserver;)V + public static fun wrapByteArray (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/Tensor; + public static fun wrapFloatArray (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/Tensor; + public static fun wrapIntArray (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/Tensor; public static fun zeros (Lsk/ainet/lang/nn/NeuralNetworkExecutionContext;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/Tensor; } @@ -1012,6 +1071,26 @@ public final class sk/ainet/lang/nn/TrainingRunnerKt { public static final fun trainStep (Lsk/ainet/lang/nn/Module;Lsk/ainet/lang/nn/loss/Loss;Lsk/ainet/lang/nn/optim/Optimizer;Lsk/ainet/context/ExecutionContext;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; } +public final class sk/ainet/lang/nn/TransposedConv1d : sk/ainet/lang/nn/Module, sk/ainet/lang/nn/topology/ModuleParameters { + public fun (IIIIIIIIZLjava/lang/String;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Z)V + public synthetic fun (IIIIIIIIZLjava/lang/String;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun forward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/context/ExecutionContext;)Lsk/ainet/lang/tensor/Tensor; + public final fun getBias ()Z + public final fun getDilation ()I + public final fun getGroups ()I + public final fun getInChannels ()I + public final fun getKernelSize ()I + public fun getModules ()Ljava/util/List; + public fun getName ()Ljava/lang/String; + public final fun getOutChannels ()I + public final fun getOutputPadding ()I + public final fun getPadding ()I + public fun getParams ()Ljava/util/List; + public final fun getStride ()I + public final fun getTrainable ()Z + public final fun outputSize (I)I +} + public final class sk/ainet/lang/nn/TransposedConv2d : sk/ainet/lang/nn/Module, sk/ainet/lang/nn/topology/ModuleParameters { public static final field Companion Lsk/ainet/lang/nn/TransposedConv2d$Companion; public fun (IILkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;IZLjava/lang/String;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)V @@ -1147,6 +1226,16 @@ public final class sk/ainet/lang/nn/activations/Sigmoid : sk/ainet/lang/nn/Modul public fun getName ()Ljava/lang/String; } +public final class sk/ainet/lang/nn/activations/Snake : sk/ainet/lang/nn/Module, sk/ainet/lang/nn/topology/ModuleParameters { + public fun (ILjava/lang/String;Lsk/ainet/lang/tensor/Tensor;)V + public synthetic fun (ILjava/lang/String;Lsk/ainet/lang/tensor/Tensor;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun forward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/context/ExecutionContext;)Lsk/ainet/lang/tensor/Tensor; + public final fun getChannels ()I + public fun getModules ()Ljava/util/List; + public fun getName ()Ljava/lang/String; + public fun getParams ()Ljava/util/List; +} + public final class sk/ainet/lang/nn/activations/Softmax : sk/ainet/lang/nn/Module { public fun (ILjava/lang/String;)V public synthetic fun (ILjava/lang/String;ILkotlin/jvm/internal/DefaultConstructorMarker;)V @@ -1504,7 +1593,9 @@ public abstract interface class sk/ainet/lang/nn/dsl/NeuralNetworkDsl : sk/ainet public abstract fun groupNorm (IIDZLjava/lang/String;)V public static synthetic fun groupNorm$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;IIDZLjava/lang/String;ILjava/lang/Object;)V public abstract fun input (ILjava/lang/String;Z)V + public fun input ([ILjava/lang/String;Z)V public static synthetic fun input$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;ILjava/lang/String;ZILjava/lang/Object;)V + public static synthetic fun input$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;[ILjava/lang/String;ZILjava/lang/Object;)V public abstract fun layerNorm ([IDZLjava/lang/String;)V public static synthetic fun layerNorm$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;[IDZLjava/lang/String;ILjava/lang/Object;)V public abstract fun maxPool2d (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)V @@ -1536,7 +1627,9 @@ public final class sk/ainet/lang/nn/dsl/NeuralNetworkDsl$DefaultImpls { public static synthetic fun dense$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;Ljava/lang/String;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)V public static synthetic fun flatten$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;Ljava/lang/String;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)V public static synthetic fun groupNorm$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;IIDZLjava/lang/String;ILjava/lang/Object;)V + public static fun input (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;[ILjava/lang/String;Z)V public static synthetic fun input$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;ILjava/lang/String;ZILjava/lang/Object;)V + public static synthetic fun input$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;[ILjava/lang/String;ZILjava/lang/Object;)V public static synthetic fun layerNorm$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;[IDZLjava/lang/String;ILjava/lang/Object;)V public static synthetic fun maxPool2d$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;Ljava/lang/String;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)V public static synthetic fun maxPool2d$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;Ljava/lang/String;ILjava/lang/Object;)V @@ -1560,16 +1653,19 @@ public final class sk/ainet/lang/nn/dsl/NeuralNetworkDslImpl : sk/ainet/lang/nn/ public fun dense (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)Lsk/ainet/lang/nn/Module; public fun dense (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)V public fun flatten (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)V + public final fun getCurrentShape ()[I public fun getExecutionContext ()Lsk/ainet/context/ExecutionContext; public final fun getKClass ()Lkotlin/reflect/KClass; public final fun getLastDimension ()I public final fun getModules ()Ljava/util/List; public fun groupNorm (IIDZLjava/lang/String;)V public fun input (ILjava/lang/String;Z)V + public fun input ([ILjava/lang/String;Z)V public fun layerNorm ([IDZLjava/lang/String;)V public fun maxPool2d (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)V public fun maxPool2d (Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;Ljava/lang/String;)V public fun sequential (Lkotlin/jvm/functions/Function1;)V + public final fun setCurrentShape ([I)V public final fun setLastDimension (I)V public fun softmax (ILjava/lang/String;)V public fun stage (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)Lsk/ainet/lang/nn/Module; @@ -1597,6 +1693,7 @@ public final class sk/ainet/lang/nn/dsl/StageImpl : sk/ainet/lang/nn/dsl/NeuralN public fun dense (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)Lsk/ainet/lang/nn/Module; public fun dense (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)V public fun flatten (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)V + public final fun getCurrentShape ()[I public fun getExecutionContext ()Lsk/ainet/context/ExecutionContext; public final fun getInputDimension ()I public final fun getKClass ()Lkotlin/reflect/KClass; @@ -1604,10 +1701,12 @@ public final class sk/ainet/lang/nn/dsl/StageImpl : sk/ainet/lang/nn/dsl/NeuralN public final fun getModules ()Ljava/util/List; public fun groupNorm (IIDZLjava/lang/String;)V public fun input (ILjava/lang/String;Z)V + public fun input ([ILjava/lang/String;Z)V public fun layerNorm ([IDZLjava/lang/String;)V public fun maxPool2d (Ljava/lang/String;Lkotlin/jvm/functions/Function1;)V public fun maxPool2d (Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;Ljava/lang/String;)V public fun sequential (Lkotlin/jvm/functions/Function1;)V + public final fun setCurrentShape ([I)V public final fun setInputDimension (I)V public final fun setLastDimension (I)V public fun softmax (ILjava/lang/String;)V @@ -1634,6 +1733,8 @@ public final class sk/ainet/lang/nn/dsl/TensorOpsNetworkDslKt { public static synthetic fun sigmoid$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;Ljava/lang/String;ILjava/lang/Object;)V public static final fun silu (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;Ljava/lang/String;)V public static synthetic fun silu$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;Ljava/lang/String;ILjava/lang/Object;)V + public static final fun tanh (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;Ljava/lang/String;)V + public static synthetic fun tanh$default (Lsk/ainet/lang/nn/dsl/NeuralNetworkDsl;Ljava/lang/String;ILjava/lang/Object;)V } public final class sk/ainet/lang/nn/dsl/TrainingConfig { @@ -2132,6 +2233,30 @@ public final class sk/ainet/lang/nn/topology/TraversalKt { public static final fun walkDepthFirst (Lsk/ainet/lang/nn/topology/ModuleNode;Lkotlin/jvm/functions/Function1;)V } +public class sk/ainet/lang/tensor/ArrayCreationBenchmark { + public fun ()V + public final fun fromFloatArray_copy ()Lsk/ainet/lang/tensor/data/TensorData; + public final fun setup ()V + public final fun wrapFloatArray_zeroCopy ()Lsk/ainet/lang/tensor/data/TensorData; +} + +public class sk/ainet/lang/tensor/BitPackerBenchmark { + public fun ()V + public final fun pack_2bit_1024 ()[B + public final fun pack_4bit_1024 ()[B + public final fun pack_4bit_128 ()[B + public final fun setup ()V + public final fun unpack_2bit_1024 ()[B + public final fun unpack_4bit_1024 ()[B +} + +public class sk/ainet/lang/tensor/BufferAccessorBenchmark { + public fun ()V + public final fun heapAccessor_readByte_sequential ()J + public final fun heapAccessor_readBytes_1KB ()[B + public final fun setup ()V +} + public final class sk/ainet/lang/tensor/ContiguityAnalysis { public fun (ZIIDLjava/util/List;Ljava/lang/String;)V public final fun component1 ()Z @@ -2177,6 +2302,14 @@ public class sk/ainet/lang/tensor/DefaultTensorViewStrategy : sk/ainet/lang/tens public fun shouldCreateView (Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;)Z } +public class sk/ainet/lang/tensor/DequantizationBenchmark { + public fun ()V + public final fun dequantQ4K ()[F + public final fun dequantQ8_0 ()[F + public final fun dequantTernary ()[F + public final fun setup ()V +} + public final class sk/ainet/lang/tensor/GradState { public fun ()V public fun (ZLsk/ainet/lang/tensor/Tensor;)V @@ -2235,10 +2368,12 @@ public final class sk/ainet/lang/tensor/LazyMaterializationStrategy : sk/ainet/l public final class sk/ainet/lang/tensor/MaterializationExtensionsKt { public static final fun canMaterialize (Lsk/ainet/lang/tensor/TensorView;)Z public static final fun canMaterialize (Lsk/ainet/lang/tensor/TensorView;Lsk/ainet/lang/tensor/MaterializationStrategy;)Z + public static final fun copyMaterialize (Lsk/ainet/lang/tensor/TensorView;)Lsk/ainet/lang/tensor/Tensor; public static final fun estimateMaterializationCost (Lsk/ainet/lang/tensor/TensorView;)J public static final fun estimateMaterializationCost (Lsk/ainet/lang/tensor/TensorView;Lsk/ainet/lang/tensor/MaterializationStrategy;)J public static final fun materialize (Lsk/ainet/lang/tensor/TensorView;)Lsk/ainet/lang/tensor/Tensor; public static final fun materialize (Lsk/ainet/lang/tensor/TensorView;Lsk/ainet/lang/tensor/MaterializationStrategy;)Lsk/ainet/lang/tensor/Tensor; + public static final fun realizeAlias (Lsk/ainet/lang/tensor/TensorView;)Lsk/ainet/lang/tensor/Tensor; } public abstract interface class sk/ainet/lang/tensor/MaterializationStrategy { @@ -2284,6 +2419,14 @@ public final class sk/ainet/lang/tensor/PprintKt { public static final fun pprint (Lsk/ainet/lang/tensor/Tensor;)Ljava/lang/String; } +public class sk/ainet/lang/tensor/RandomRotationBenchmark { + public fun ()V + public final fun rotateInverse_128d ()[F + public final fun rotate_128d ()[F + public final fun rotate_256d ()[F + public final fun setup ()V +} + public final class sk/ainet/lang/tensor/SegmentBuilder { public fun (I)V public final fun all ()Lsk/ainet/lang/tensor/Slice; @@ -2406,6 +2549,15 @@ public final class sk/ainet/lang/tensor/SlicedTensorView : sk/ainet/lang/tensor/ public fun zeroGrad ()V } +public class sk/ainet/lang/tensor/StorageBridgeBenchmark { + public fun ()V + public final fun floatTensorData_toStorage ()Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun q4kTensorData_toStorage ()Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun setup ()V + public final fun storage_toTensorData_float ()Lsk/ainet/lang/tensor/data/TensorData; + public final fun storage_toTensorData_q4k ()Lsk/ainet/lang/tensor/data/TensorData; +} + public abstract interface class sk/ainet/lang/tensor/Tensor { public fun accumulateGrad (Lsk/ainet/lang/tensor/Tensor;)V public abstract fun getData ()Lsk/ainet/lang/tensor/data/TensorData; @@ -2461,6 +2613,9 @@ public final class sk/ainet/lang/tensor/TensorExtensionsKt { public static final fun gelu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public static final fun leakyRelu (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun leakyRelu$default (Lsk/ainet/lang/tensor/Tensor;FILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; + public static final fun log (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public static final fun log10 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public static final fun log2 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public static final fun logSoftmax (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun logSoftmax$default (Lsk/ainet/lang/tensor/Tensor;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static final fun lt (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; @@ -2475,6 +2630,8 @@ public final class sk/ainet/lang/tensor/TensorExtensionsKt { public static final fun plus (Ljava/lang/Number;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public static final fun plus (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public static final fun plus (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public static final fun pow (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; + public static final fun pow (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public static final fun relu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public static final fun reshape (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Shape;)Lsk/ainet/lang/tensor/Tensor; public static final fun sigmoid (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; @@ -2488,6 +2645,7 @@ public final class sk/ainet/lang/tensor/TensorExtensionsKt { public static final fun sum (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun sum$default (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;ILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static final fun t (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public static final fun tanh (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public static final fun times (Ljava/lang/Number;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public static final fun times (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public static final fun times (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; @@ -2551,6 +2709,35 @@ public final class sk/ainet/lang/tensor/TesnorExtKt { public static final fun isVector (Lsk/ainet/lang/tensor/Tensor;)Z } +public class sk/ainet/lang/tensor/TurboQuantDecodeBenchmark { + public fun ()V + public final fun decode_3bit_128d ()[F + public final fun decode_4bit_128d ()[F + public final fun decode_4bit_256d ()[F + public final fun decode_4bit_qjl_128d ()[F + public final fun decode_8bit_128d ()[F + public final fun setup ()V +} + +public class sk/ainet/lang/tensor/TurboQuantEncodeBenchmark { + public fun ()V + public final fun encode_3bit_128d ()Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock; + public final fun encode_4bit_128d ()Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock; + public final fun encode_4bit_256d ()Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock; + public final fun encode_4bit_qjl_128d ()Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock; + public final fun encode_8bit_128d ()Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock; + public final fun setup ()V +} + +public class sk/ainet/lang/tensor/TurboQuantKvCacheBenchmark { + public fun ()V + public final fun appendToken_dense ()V + public final fun appendToken_turbo4bit ()V + public final fun readKeys_dense_16tokens ()V + public final fun readKeys_turbo4bit_16tokens ()V + public final fun setup ()V +} + public final class sk/ainet/lang/tensor/ViewManagementExtensionsKt { public static final fun analyzeContiguity (Lsk/ainet/lang/tensor/TensorView;)Lsk/ainet/lang/tensor/ContiguityAnalysis; public static final fun batchMaterialize (Ljava/util/List;)Ljava/util/List; @@ -2600,6 +2787,43 @@ public final class sk/ainet/lang/tensor/benchmark/SlicingBenchmarksKt { public static synthetic fun slicingBenchmarkSuite$default (Ljava/lang/String;Lsk/ainet/lang/tensor/benchmark/SlicingBenchmarkConfig;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lsk/ainet/benchmark/BenchmarkSuite; } +public final class sk/ainet/lang/tensor/data/Bf16DenseTensorData : sk/ainet/lang/tensor/data/Bf16TensorData { + public static final field Companion Lsk/ainet/lang/tensor/data/Bf16DenseTensorData$Companion; + public fun (Lsk/ainet/lang/tensor/Shape;[B)V + public fun copyToFloatArray ()[F + public fun get ([I)Ljava/lang/Float; + public synthetic fun get ([I)Ljava/lang/Object; + public fun getPackedData ()[B + public fun getShape ()Lsk/ainet/lang/tensor/Shape; + public fun set ([IF)V + public synthetic fun set ([ILjava/lang/Object;)V +} + +public final class sk/ainet/lang/tensor/data/Bf16DenseTensorData$Companion { + public final fun fromFloatArray (Lsk/ainet/lang/tensor/Shape;[F)Lsk/ainet/lang/tensor/data/Bf16DenseTensorData; + public final fun fromRawBytes (Lsk/ainet/lang/tensor/Shape;[B)Lsk/ainet/lang/tensor/data/Bf16DenseTensorData; +} + +public abstract interface class sk/ainet/lang/tensor/data/Bf16TensorData : sk/ainet/lang/tensor/data/TensorData { + public static final field BYTES_PER_ELEMENT I + public static final field Companion Lsk/ainet/lang/tensor/data/Bf16TensorData$Companion; + public abstract fun getPackedData ()[B +} + +public final class sk/ainet/lang/tensor/data/Bf16TensorData$Companion { + public static final field BYTES_PER_ELEMENT I + public final fun bf16BitsToFloat (I)F + public final fun floatToBf16Bits (F)I +} + +public final class sk/ainet/lang/tensor/data/Bf16TensorData$DefaultImpls { + public static fun copyToFloatArray (Lsk/ainet/lang/tensor/data/Bf16TensorData;)[F +} + +public final class sk/ainet/lang/tensor/data/Bf16TensorDataKt { + public static final fun toFloatArray (Lsk/ainet/lang/tensor/data/Bf16TensorData;)[F +} + public final class sk/ainet/lang/tensor/data/DenseFloatArrayTensorData : sk/ainet/lang/tensor/data/FloatArrayTensorData { public fun (Lsk/ainet/lang/tensor/Shape;[F)V public fun copyToFloatArray ()[F @@ -2636,6 +2860,7 @@ public final class sk/ainet/lang/tensor/data/DenseTensorDataFactory : sk/ainet/l public fun init (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lsk/ainet/lang/tensor/data/TensorData; public final fun matrix ([[Ljava/lang/Object;)Lsk/ainet/lang/tensor/data/TensorData; public fun ones (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; + public fun placeholder (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; public final fun randn (Lsk/ainet/lang/tensor/Shape;FFLsk/ainet/lang/types/DType;Lkotlin/random/Random;)Lsk/ainet/lang/tensor/data/TensorData; public fun randn (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;FFLkotlin/random/Random;)Lsk/ainet/lang/tensor/data/TensorData; public static synthetic fun randn$default (Lsk/ainet/lang/tensor/data/DenseTensorDataFactory;Lsk/ainet/lang/tensor/Shape;FFLsk/ainet/lang/types/DType;Lkotlin/random/Random;ILjava/lang/Object;)Lsk/ainet/lang/tensor/data/TensorData; @@ -2644,6 +2869,9 @@ public final class sk/ainet/lang/tensor/data/DenseTensorDataFactory : sk/ainet/l public final fun scalar (Ljava/lang/Object;)Lsk/ainet/lang/tensor/data/TensorData; public fun uniform (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;FFLkotlin/random/Random;)Lsk/ainet/lang/tensor/data/TensorData; public final fun vector ([Ljava/lang/Object;)Lsk/ainet/lang/tensor/data/TensorData; + public fun wrapByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/data/TensorData; + public fun wrapFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/data/TensorData; + public fun wrapIntArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/data/TensorData; public fun zeros (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; } @@ -2677,6 +2905,28 @@ public abstract interface class sk/ainet/lang/tensor/data/ItemsAccessor { public abstract fun set ([ILjava/lang/Object;)V } +public final class sk/ainet/lang/tensor/data/LazyZeroFloatArrayTensorData : sk/ainet/lang/tensor/data/FloatArrayTensorData { + public fun (Lsk/ainet/lang/tensor/Shape;)V + public fun copyToFloatArray ()[F + public fun get ([I)Ljava/lang/Float; + public synthetic fun get ([I)Ljava/lang/Object; + public fun getBuffer ()[F + public fun getShape ()Lsk/ainet/lang/tensor/Shape; + public fun set ([IF)V + public synthetic fun set ([ILjava/lang/Object;)V +} + +public final class sk/ainet/lang/tensor/data/LazyZeroIntArrayTensorData : sk/ainet/lang/tensor/data/IntArrayTensorData { + public fun (Lsk/ainet/lang/tensor/Shape;)V + public fun copyToFloatArray ()[F + public fun get ([I)Ljava/lang/Integer; + public synthetic fun get ([I)Ljava/lang/Object; + public fun getBuffer ()[I + public fun getShape ()Lsk/ainet/lang/tensor/Shape; + public fun set ([II)V + public synthetic fun set ([ILjava/lang/Object;)V +} + public abstract interface class sk/ainet/lang/tensor/data/MemorySegmentBackedData { public abstract fun getSegment ()Ljava/lang/foreign/MemorySegment; public abstract fun getSegmentByteOffset ()J @@ -2704,8 +2954,8 @@ public final class sk/ainet/lang/tensor/data/MemorySegmentTensorData : sk/ainet/ public final class sk/ainet/lang/tensor/data/MemorySegmentTensorDataFactory : java/lang/AutoCloseable, sk/ainet/lang/tensor/data/TensorDataFactory { public fun ()V - public fun (Ljava/lang/foreign/Arena;J)V - public synthetic fun (Ljava/lang/foreign/Arena;JILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun (J)V + public synthetic fun (JILkotlin/jvm/internal/DefaultConstructorMarker;)V public fun close ()V public fun fromByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/data/TensorData; public fun fromFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/data/TensorData; @@ -2713,9 +2963,13 @@ public final class sk/ainet/lang/tensor/data/MemorySegmentTensorDataFactory : ja public fun full (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Ljava/lang/Number;)Lsk/ainet/lang/tensor/data/TensorData; public fun init (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lsk/ainet/lang/tensor/data/TensorData; public fun ones (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; + public fun placeholder (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; public fun randn (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;FFLkotlin/random/Random;)Lsk/ainet/lang/tensor/data/TensorData; public fun randomInit (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;Lkotlin/random/Random;)Lsk/ainet/lang/tensor/data/TensorData; public fun uniform (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;FFLkotlin/random/Random;)Lsk/ainet/lang/tensor/data/TensorData; + public fun wrapByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/data/TensorData; + public fun wrapFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/data/TensorData; + public fun wrapIntArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/data/TensorData; public fun zeros (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; } @@ -2776,22 +3030,78 @@ public final class sk/ainet/lang/tensor/data/Q4MemorySegmentTensorData$Companion public static synthetic fun fromRawBytes$default (Lsk/ainet/lang/tensor/data/Q4MemorySegmentTensorData$Companion;Lsk/ainet/lang/tensor/Shape;[BLjava/lang/foreign/Arena;JILjava/lang/Object;)Lsk/ainet/lang/tensor/data/Q4MemorySegmentTensorData; } -public final class sk/ainet/lang/tensor/data/Q4_KBlockTensorData : sk/ainet/lang/tensor/data/Q4_KTensorData { +public final class sk/ainet/lang/tensor/data/Q4_0BlockTensorData : sk/ainet/lang/tensor/data/Q4_0TensorData, sk/ainet/lang/tensor/storage/PackedBlockStorage { + public static final field Companion Lsk/ainet/lang/tensor/data/Q4_0BlockTensorData$Companion; + public fun (Lsk/ainet/lang/tensor/Shape;[B)V + public fun copyToFloatArray ()[F + public fun dequantizeBlock (I[FI)V + public fun get ([I)Ljava/lang/Byte; + public synthetic fun get ([I)Ljava/lang/Object; + public fun getBlockCount ()I + public fun getBlockScale (I)F + public fun getBlockSize ()I + public fun getCode (II)B + public fun getElementCount ()J + public fun getEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public fun getPackedData ()[B + public fun getPhysicalBytes ()J + public fun getShape ()Lsk/ainet/lang/tensor/Shape; + public fun set ([IB)V + public synthetic fun set ([ILjava/lang/Object;)V + public fun toFloatArray ()[F + public fun toTensorStorage (Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/TensorStorage; +} + +public final class sk/ainet/lang/tensor/data/Q4_0BlockTensorData$Companion { + public final fun fromRawBytes (Lsk/ainet/lang/tensor/Shape;[B)Lsk/ainet/lang/tensor/data/Q4_0BlockTensorData; +} + +public abstract interface class sk/ainet/lang/tensor/data/Q4_0TensorData : sk/ainet/lang/tensor/data/TensorData { + public static final field BLOCK_SIZE I + public static final field BYTES_PER_BLOCK I + public static final field Companion Lsk/ainet/lang/tensor/data/Q4_0TensorData$Companion; + public abstract fun getBlockCount ()I + public abstract fun getBlockScale (I)F + public abstract fun getCode (II)B + public abstract fun getPackedData ()[B +} + +public final class sk/ainet/lang/tensor/data/Q4_0TensorData$Companion { + public static final field BLOCK_SIZE I + public static final field BYTES_PER_BLOCK I +} + +public final class sk/ainet/lang/tensor/data/Q4_0TensorData$DefaultImpls { + public static fun copyToFloatArray (Lsk/ainet/lang/tensor/data/Q4_0TensorData;)[F +} + +public final class sk/ainet/lang/tensor/data/Q4_0TensorDataKt { + public static final fun toFloatArray (Lsk/ainet/lang/tensor/data/Q4_0TensorData;)[F +} + +public final class sk/ainet/lang/tensor/data/Q4_KBlockTensorData : sk/ainet/lang/tensor/data/Q4_KTensorData, sk/ainet/lang/tensor/storage/PackedBlockStorage { public static final field Companion Lsk/ainet/lang/tensor/data/Q4_KBlockTensorData$Companion; public fun (Lsk/ainet/lang/tensor/Shape;[B)V public fun copyToFloatArray ()[F + public fun dequantizeBlock (I[FI)V public fun get ([I)Ljava/lang/Byte; public synthetic fun get ([I)Ljava/lang/Object; public fun getBlockCount ()I public fun getBlockD (I)F public fun getBlockDMin (I)F + public fun getBlockSize ()I public fun getCode (II)I + public fun getElementCount ()J + public fun getEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; public fun getPackedData ()[B + public fun getPhysicalBytes ()J public fun getShape ()Lsk/ainet/lang/tensor/Shape; public fun getSubBlockMin (II)F public fun getSubBlockScale (II)F public fun set ([IB)V public synthetic fun set ([ILjava/lang/Object;)V + public fun toFloatArray ()[F + public fun toTensorStorage (Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/TensorStorage; } public final class sk/ainet/lang/tensor/data/Q4_KBlockTensorData$Companion { @@ -2828,6 +3138,61 @@ public final class sk/ainet/lang/tensor/data/Q4_KTensorDataKt { public static final fun toFloatArray (Lsk/ainet/lang/tensor/data/Q4_KTensorData;)[F } +public final class sk/ainet/lang/tensor/data/Q6_KBlockTensorData : sk/ainet/lang/tensor/data/Q6_KTensorData, sk/ainet/lang/tensor/storage/PackedBlockStorage { + public static final field Companion Lsk/ainet/lang/tensor/data/Q6_KBlockTensorData$Companion; + public fun (Lsk/ainet/lang/tensor/Shape;[B)V + public fun copyToFloatArray ()[F + public fun dequantizeBlock (I[FI)V + public fun get ([I)Ljava/lang/Byte; + public synthetic fun get ([I)Ljava/lang/Object; + public fun getBlockCount ()I + public fun getBlockD (I)F + public fun getBlockSize ()I + public fun getCode (II)I + public fun getElementCount ()J + public fun getEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public fun getPackedData ()[B + public fun getPhysicalBytes ()J + public fun getShape ()Lsk/ainet/lang/tensor/Shape; + public fun getSubBlockScale (II)I + public fun set ([IB)V + public synthetic fun set ([ILjava/lang/Object;)V + public fun toFloatArray ()[F + public fun toTensorStorage (Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/TensorStorage; +} + +public final class sk/ainet/lang/tensor/data/Q6_KBlockTensorData$Companion { + public final fun fromRawBytes (Lsk/ainet/lang/tensor/Shape;[B)Lsk/ainet/lang/tensor/data/Q6_KBlockTensorData; +} + +public abstract interface class sk/ainet/lang/tensor/data/Q6_KTensorData : sk/ainet/lang/tensor/data/TensorData { + public static final field BLOCK_SIZE I + public static final field BYTES_PER_BLOCK I + public static final field Companion Lsk/ainet/lang/tensor/data/Q6_KTensorData$Companion; + public static final field SUB_BLOCKS_PER_BLOCK I + public static final field SUB_BLOCK_SIZE I + public abstract fun getBlockCount ()I + public abstract fun getBlockD (I)F + public abstract fun getCode (II)I + public abstract fun getPackedData ()[B + public abstract fun getSubBlockScale (II)I +} + +public final class sk/ainet/lang/tensor/data/Q6_KTensorData$Companion { + public static final field BLOCK_SIZE I + public static final field BYTES_PER_BLOCK I + public static final field SUB_BLOCKS_PER_BLOCK I + public static final field SUB_BLOCK_SIZE I +} + +public final class sk/ainet/lang/tensor/data/Q6_KTensorData$DefaultImpls { + public static fun copyToFloatArray (Lsk/ainet/lang/tensor/data/Q6_KTensorData;)[F +} + +public final class sk/ainet/lang/tensor/data/Q6_KTensorDataKt { + public static final fun toFloatArray (Lsk/ainet/lang/tensor/data/Q6_KTensorData;)[F +} + public abstract interface class sk/ainet/lang/tensor/data/Q8MemorySegmentMarker : sk/ainet/lang/tensor/data/MemorySegmentBackedData { public abstract fun getBlockCount ()I public abstract fun getBlockSize ()I @@ -2856,19 +3221,26 @@ public final class sk/ainet/lang/tensor/data/Q8MemorySegmentTensorData$Companion public static synthetic fun fromRawBytes$default (Lsk/ainet/lang/tensor/data/Q8MemorySegmentTensorData$Companion;Lsk/ainet/lang/tensor/Shape;[BLjava/lang/foreign/Arena;JILjava/lang/Object;)Lsk/ainet/lang/tensor/data/Q8MemorySegmentTensorData; } -public final class sk/ainet/lang/tensor/data/Q8_0BlockTensorData : sk/ainet/lang/tensor/data/Q8_0TensorData { +public final class sk/ainet/lang/tensor/data/Q8_0BlockTensorData : sk/ainet/lang/tensor/data/Q8_0TensorData, sk/ainet/lang/tensor/storage/PackedBlockStorage { public static final field Companion Lsk/ainet/lang/tensor/data/Q8_0BlockTensorData$Companion; public fun (Lsk/ainet/lang/tensor/Shape;[B)V public fun copyToFloatArray ()[F + public fun dequantizeBlock (I[FI)V public fun get ([I)Ljava/lang/Byte; public synthetic fun get ([I)Ljava/lang/Object; public fun getBlockCount ()I public fun getBlockScale (I)F + public fun getBlockSize ()I public fun getCode (II)B + public fun getElementCount ()J + public fun getEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; public fun getPackedData ()[B + public fun getPhysicalBytes ()J public fun getShape ()Lsk/ainet/lang/tensor/Shape; public fun set ([IB)V public synthetic fun set ([ILjava/lang/Object;)V + public fun toFloatArray ()[F + public fun toTensorStorage (Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/TensorStorage; } public final class sk/ainet/lang/tensor/data/Q8_0BlockTensorData$Companion { @@ -2914,12 +3286,23 @@ public abstract interface class sk/ainet/lang/tensor/data/TensorDataFactory { public abstract fun full (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Ljava/lang/Number;)Lsk/ainet/lang/tensor/data/TensorData; public abstract fun init (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;)Lsk/ainet/lang/tensor/data/TensorData; public abstract fun ones (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; + public fun placeholder (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; public abstract fun randn (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;FFLkotlin/random/Random;)Lsk/ainet/lang/tensor/data/TensorData; public abstract fun randomInit (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;Lkotlin/jvm/functions/Function1;Lkotlin/random/Random;)Lsk/ainet/lang/tensor/data/TensorData; public abstract fun uniform (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;FFLkotlin/random/Random;)Lsk/ainet/lang/tensor/data/TensorData; + public fun wrapByteArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/data/TensorData; + public fun wrapFloatArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/data/TensorData; + public fun wrapIntArray (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/data/TensorData; public abstract fun zeros (Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; } +public final class sk/ainet/lang/tensor/data/TensorDataFactory$DefaultImpls { + public static fun placeholder (Lsk/ainet/lang/tensor/data/TensorDataFactory;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;)Lsk/ainet/lang/tensor/data/TensorData; + public static fun wrapByteArray (Lsk/ainet/lang/tensor/data/TensorDataFactory;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[B)Lsk/ainet/lang/tensor/data/TensorData; + public static fun wrapFloatArray (Lsk/ainet/lang/tensor/data/TensorDataFactory;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[F)Lsk/ainet/lang/tensor/data/TensorData; + public static fun wrapIntArray (Lsk/ainet/lang/tensor/data/TensorDataFactory;Lsk/ainet/lang/tensor/Shape;Lkotlin/reflect/KClass;[I)Lsk/ainet/lang/tensor/data/TensorData; +} + public final class sk/ainet/lang/tensor/data/TensorFactoryRegistry { public static final field INSTANCE Lsk/ainet/lang/tensor/data/TensorFactoryRegistry; public final fun getFactory (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/data/TensorDataFactory; @@ -2927,18 +3310,26 @@ public final class sk/ainet/lang/tensor/data/TensorFactoryRegistry { public final fun registerFactory (Lsk/ainet/lang/types/DType;Lsk/ainet/lang/tensor/data/TensorDataFactory;)V } -public final class sk/ainet/lang/tensor/data/Ternary2BitTensorData : sk/ainet/lang/tensor/data/TernaryTensorData { +public final class sk/ainet/lang/tensor/data/Ternary2BitTensorData : sk/ainet/lang/tensor/data/TernaryTensorData, sk/ainet/lang/tensor/storage/PackedBlockStorage { public static final field Companion Lsk/ainet/lang/tensor/data/Ternary2BitTensorData$Companion; public fun (Lsk/ainet/lang/tensor/Shape;[BF)V public synthetic fun (Lsk/ainet/lang/tensor/Shape;[BFILkotlin/jvm/internal/DefaultConstructorMarker;)V public fun copyToFloatArray ()[F + public fun dequantizeBlock (I[FI)V public fun get ([I)Ljava/lang/Byte; public synthetic fun get ([I)Ljava/lang/Object; + public fun getBlockCount ()I + public fun getBlockSize ()I + public fun getElementCount ()J + public fun getEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; public fun getPackedData ()[B + public fun getPhysicalBytes ()J public fun getScale ()F public fun getShape ()Lsk/ainet/lang/tensor/Shape; public fun set ([IB)V public synthetic fun set ([ILjava/lang/Object;)V + public fun toFloatArray ()[F + public fun toTensorStorage (Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/TensorStorage; } public final class sk/ainet/lang/tensor/data/Ternary2BitTensorData$Companion { @@ -3396,6 +3787,15 @@ public final class sk/ainet/lang/tensor/ops/Conv3dOperation : sk/ainet/lang/tens public fun validateInputs (Ljava/util/List;)Lsk/ainet/lang/tensor/ops/ValidationResult; } +public final class sk/ainet/lang/tensor/ops/ConvShapeUtils { + public static final field INSTANCE Lsk/ainet/lang/tensor/ops/ConvShapeUtils; + public final fun conv1dOutputShape ([I[IIII)[I + public final fun conv2dOutputShape ([I[ILkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;)[I + public final fun conv3dOutputShape ([I[ILkotlin/Triple;Lkotlin/Triple;Lkotlin/Triple;)[I + public final fun pool2dOutputShape ([ILkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;)[I + public final fun upsample2dOutputShape ([ILkotlin/Pair;)[I +} + public abstract interface class sk/ainet/lang/tensor/ops/DifferentiableTensorOps { public abstract fun absBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun addBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; @@ -3414,6 +3814,9 @@ public abstract interface class sk/ainet/lang/tensor/ops/DifferentiableTensorOps public abstract fun flattenBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun geluBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun leakyReluBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public abstract fun log10Backward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public abstract fun log2Backward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public abstract fun logBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun logSoftmaxBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun matmulBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun maxPool2dBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; @@ -3422,6 +3825,9 @@ public abstract interface class sk/ainet/lang/tensor/ops/DifferentiableTensorOps public abstract fun multiplyBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun narrowBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun pad2dBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public abstract fun permuteBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public abstract fun powBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public abstract fun powScalarBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun rdivScalarBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun reluBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun reshapeBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; @@ -3436,6 +3842,7 @@ public abstract interface class sk/ainet/lang/tensor/ops/DifferentiableTensorOps public abstract fun subScalarBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun subtractBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun sumBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; + public abstract fun tanhBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun transposeBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun unsqueezeBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; public abstract fun upsample2dBackward (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Ljava/util/List;Ljava/util/Map;)Ljava/util/List; @@ -3496,7 +3903,9 @@ public final class sk/ainet/lang/tensor/ops/KspTensorOps : sk/ainet/lang/tensor/ public fun conv1d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIII)Lsk/ainet/lang/tensor/Tensor; public fun conv2d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;I)Lsk/ainet/lang/tensor/Tensor; public fun conv3d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Triple;Lkotlin/Triple;Lkotlin/Triple;I)Lsk/ainet/lang/tensor/Tensor; + public fun convTranspose1d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIII)Lsk/ainet/lang/tensor/Tensor; public fun convert (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/Tensor; + public fun cos (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun divScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public fun divide (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun elu (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; @@ -3508,6 +3917,9 @@ public final class sk/ainet/lang/tensor/ops/KspTensorOps : sk/ainet/lang/tensor/ public fun gelu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun indexSelect (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun leakyRelu (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; + public fun log (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun log10 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun log2 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun logSoftmax (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun lt (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; public fun matmul (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; @@ -3517,6 +3929,9 @@ public final class sk/ainet/lang/tensor/ops/KspTensorOps : sk/ainet/lang/tensor/ public fun multiply (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun narrow (Lsk/ainet/lang/tensor/Tensor;III)Lsk/ainet/lang/tensor/Tensor; public fun pad2d (Lsk/ainet/lang/tensor/Tensor;IIII)Lsk/ainet/lang/tensor/Tensor; + public fun permute (Lsk/ainet/lang/tensor/Tensor;[I)Lsk/ainet/lang/tensor/Tensor; + public fun pow (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun powScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public fun rdivScalar (Ljava/lang/Number;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun relu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun reshape (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Shape;)Lsk/ainet/lang/tensor/Tensor; @@ -3525,6 +3940,7 @@ public final class sk/ainet/lang/tensor/ops/KspTensorOps : sk/ainet/lang/tensor/ public fun sigmoid (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun sign (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun silu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun sin (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun softmax (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun split (Lsk/ainet/lang/tensor/Tensor;II)Ljava/util/List; public fun sqrt (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; @@ -3532,6 +3948,7 @@ public final class sk/ainet/lang/tensor/ops/KspTensorOps : sk/ainet/lang/tensor/ public fun subScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public fun subtract (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun sum (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;)Lsk/ainet/lang/tensor/Tensor; + public fun tanh (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun transpose (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun tril (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun unfold (Lsk/ainet/lang/tensor/Tensor;III)Lsk/ainet/lang/tensor/Tensor; @@ -3540,6 +3957,36 @@ public final class sk/ainet/lang/tensor/ops/KspTensorOps : sk/ainet/lang/tensor/ public fun variance (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;)Lsk/ainet/lang/tensor/Tensor; } +public final class sk/ainet/lang/tensor/ops/Log10Operation : sk/ainet/lang/tensor/ops/BaseOperation { + public fun ()V + public fun (Ljava/util/Map;)V + public synthetic fun (Ljava/util/Map;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun clone (Ljava/util/Map;)Lsk/ainet/lang/tensor/ops/Operation; + public fun execute (Ljava/util/List;)Ljava/util/List; + public fun inferOutputs (Ljava/util/List;)Ljava/util/List; + public fun validateInputs (Ljava/util/List;)Lsk/ainet/lang/tensor/ops/ValidationResult; +} + +public final class sk/ainet/lang/tensor/ops/Log2Operation : sk/ainet/lang/tensor/ops/BaseOperation { + public fun ()V + public fun (Ljava/util/Map;)V + public synthetic fun (Ljava/util/Map;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun clone (Ljava/util/Map;)Lsk/ainet/lang/tensor/ops/Operation; + public fun execute (Ljava/util/List;)Ljava/util/List; + public fun inferOutputs (Ljava/util/List;)Ljava/util/List; + public fun validateInputs (Ljava/util/List;)Lsk/ainet/lang/tensor/ops/ValidationResult; +} + +public final class sk/ainet/lang/tensor/ops/LogOperation : sk/ainet/lang/tensor/ops/BaseOperation { + public fun ()V + public fun (Ljava/util/Map;)V + public synthetic fun (Ljava/util/Map;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun clone (Ljava/util/Map;)Lsk/ainet/lang/tensor/ops/Operation; + public fun execute (Ljava/util/List;)Ljava/util/List; + public fun inferOutputs (Ljava/util/List;)Ljava/util/List; + public fun validateInputs (Ljava/util/List;)Lsk/ainet/lang/tensor/ops/ValidationResult; +} + public final class sk/ainet/lang/tensor/ops/MatmulOperation : sk/ainet/lang/tensor/ops/BaseOperation { public fun ()V public fun (Ljava/util/Map;)V @@ -3565,6 +4012,12 @@ public abstract interface class sk/ainet/lang/tensor/ops/MixedPrecisionTensorOps public abstract fun convert (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/Tensor; } +public final class sk/ainet/lang/tensor/ops/MixedPrecisionTensorOps$DefaultImpls { + public static fun convTranspose1d (Lsk/ainet/lang/tensor/ops/MixedPrecisionTensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIII)Lsk/ainet/lang/tensor/Tensor; + public static fun cos (Lsk/ainet/lang/tensor/ops/MixedPrecisionTensorOps;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public static fun sin (Lsk/ainet/lang/tensor/ops/MixedPrecisionTensorOps;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; +} + public final class sk/ainet/lang/tensor/ops/MultiplyOperation : sk/ainet/lang/tensor/ops/BaseOperation { public fun ()V public fun (Ljava/util/Map;)V @@ -3593,6 +4046,16 @@ public final class sk/ainet/lang/tensor/ops/Operation$DefaultImpls { public static fun getDescription (Lsk/ainet/lang/tensor/ops/Operation;)Ljava/lang/String; } +public final class sk/ainet/lang/tensor/ops/PowOperation : sk/ainet/lang/tensor/ops/BaseOperation { + public fun ()V + public fun (Ljava/util/Map;)V + public synthetic fun (Ljava/util/Map;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun clone (Ljava/util/Map;)Lsk/ainet/lang/tensor/ops/Operation; + public fun execute (Ljava/util/List;)Ljava/util/List; + public fun inferOutputs (Ljava/util/List;)Ljava/util/List; + public fun validateInputs (Ljava/util/List;)Lsk/ainet/lang/tensor/ops/ValidationResult; +} + public final class sk/ainet/lang/tensor/ops/QuantizedMatmul { public static final field INSTANCE Lsk/ainet/lang/tensor/ops/QuantizedMatmul; public final fun isQ4_KWeight (Lsk/ainet/lang/tensor/Tensor;)Z @@ -3628,6 +4091,16 @@ public final class sk/ainet/lang/tensor/ops/ReshapeOperation : sk/ainet/lang/ten public fun validateInputs (Ljava/util/List;)Lsk/ainet/lang/tensor/ops/ValidationResult; } +public final class sk/ainet/lang/tensor/ops/ScaledDotProductAttentionOperation : sk/ainet/lang/tensor/ops/BaseOperation { + public fun ()V + public fun (Ljava/util/Map;)V + public synthetic fun (Ljava/util/Map;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun clone (Ljava/util/Map;)Lsk/ainet/lang/tensor/ops/Operation; + public fun execute (Ljava/util/List;)Ljava/util/List; + public fun inferOutputs (Ljava/util/List;)Ljava/util/List; + public fun validateInputs (Ljava/util/List;)Lsk/ainet/lang/tensor/ops/ValidationResult; +} + public final class sk/ainet/lang/tensor/ops/SigmoidOperation : sk/ainet/lang/tensor/ops/BaseOperation { public fun ()V public fun (Ljava/util/Map;)V @@ -3668,6 +4141,16 @@ public final class sk/ainet/lang/tensor/ops/SubtractOperation : sk/ainet/lang/te public fun validateInputs (Ljava/util/List;)Lsk/ainet/lang/tensor/ops/ValidationResult; } +public final class sk/ainet/lang/tensor/ops/TanhOperation : sk/ainet/lang/tensor/ops/BaseOperation { + public fun ()V + public fun (Ljava/util/Map;)V + public synthetic fun (Ljava/util/Map;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun clone (Ljava/util/Map;)Lsk/ainet/lang/tensor/ops/Operation; + public fun execute (Ljava/util/List;)Ljava/util/List; + public fun inferOutputs (Ljava/util/List;)Ljava/util/List; + public fun validateInputs (Ljava/util/List;)Lsk/ainet/lang/tensor/ops/ValidationResult; +} + public abstract interface class sk/ainet/lang/tensor/ops/TensorOps { public abstract fun abs (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun add (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; @@ -3682,7 +4165,10 @@ public abstract interface class sk/ainet/lang/tensor/ops/TensorOps { public static synthetic fun conv2d$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public abstract fun conv3d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Triple;Lkotlin/Triple;Lkotlin/Triple;I)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun conv3d$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Triple;Lkotlin/Triple;Lkotlin/Triple;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; + public fun convTranspose1d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIII)Lsk/ainet/lang/tensor/Tensor; + public static synthetic fun convTranspose1d$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIIIILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public abstract fun convert (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/Tensor; + public fun cos (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun divScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public abstract fun divide (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun elu (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; @@ -3699,6 +4185,9 @@ public abstract interface class sk/ainet/lang/tensor/ops/TensorOps { public static synthetic fun indexSelect$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public abstract fun leakyRelu (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun leakyRelu$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;FILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; + public abstract fun log (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public abstract fun log10 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public abstract fun log2 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun logSoftmax (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun logSoftmax$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public abstract fun lt (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; @@ -3711,6 +4200,9 @@ public abstract interface class sk/ainet/lang/tensor/ops/TensorOps { public abstract fun multiply (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun narrow (Lsk/ainet/lang/tensor/Tensor;III)Lsk/ainet/lang/tensor/Tensor; public abstract fun pad2d (Lsk/ainet/lang/tensor/Tensor;IIII)Lsk/ainet/lang/tensor/Tensor; + public abstract fun permute (Lsk/ainet/lang/tensor/Tensor;[I)Lsk/ainet/lang/tensor/Tensor; + public abstract fun pow (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public abstract fun powScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public abstract fun rdivScalar (Ljava/lang/Number;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun relu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun reshape (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Shape;)Lsk/ainet/lang/tensor/Tensor; @@ -3720,6 +4212,7 @@ public abstract interface class sk/ainet/lang/tensor/ops/TensorOps { public abstract fun sigmoid (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun sign (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun silu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun sin (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun softmax (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun softmax$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public abstract fun split (Lsk/ainet/lang/tensor/Tensor;II)Ljava/util/List; @@ -3730,6 +4223,7 @@ public abstract interface class sk/ainet/lang/tensor/ops/TensorOps { public abstract fun subtract (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun sum (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun sum$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;ILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; + public abstract fun tanh (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun transpose (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public abstract fun tril (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun tril$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; @@ -3746,6 +4240,9 @@ public final class sk/ainet/lang/tensor/ops/TensorOps$DefaultImpls { public static synthetic fun conv1d$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIIILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun conv2d$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun conv3d$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Triple;Lkotlin/Triple;Lkotlin/Triple;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; + public static fun convTranspose1d (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIII)Lsk/ainet/lang/tensor/Tensor; + public static synthetic fun convTranspose1d$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIIIILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; + public static fun cos (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun elu$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;FILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun flatten$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;IIILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun gather$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; @@ -3755,6 +4252,7 @@ public final class sk/ainet/lang/tensor/ops/TensorOps$DefaultImpls { public static synthetic fun maxPool2d$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;ILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun mean$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;ILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun scaledDotProductAttention$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;FZILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; + public static fun sin (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun softmax$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;IILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun squeeze$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;ILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; public static synthetic fun sum$default (Lsk/ainet/lang/tensor/ops/TensorOps;Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;ILjava/lang/Object;)Lsk/ainet/lang/tensor/Tensor; @@ -3783,6 +4281,13 @@ public final class sk/ainet/lang/tensor/ops/TensorSpec { public fun toString ()Ljava/lang/String; } +public final class sk/ainet/lang/tensor/ops/TensorSpecs { + public static final field TENSOR_ENCODING_METADATA_KEY Ljava/lang/String; + public static final fun getTensorEncoding (Lsk/ainet/lang/tensor/ops/TensorSpec;)Lsk/ainet/lang/tensor/storage/TensorEncoding; + public static final fun inferTensorEncoding (Lsk/ainet/lang/tensor/data/TensorData;)Lsk/ainet/lang/tensor/storage/TensorEncoding; + public static final fun withTensorEncoding (Lsk/ainet/lang/tensor/ops/TensorSpec;Lsk/ainet/lang/tensor/storage/TensorEncoding;)Lsk/ainet/lang/tensor/ops/TensorSpec; +} + public final class sk/ainet/lang/tensor/ops/TernaryMatmul { public static final field INSTANCE Lsk/ainet/lang/tensor/ops/TernaryMatmul; public final fun isTernaryWeight (Lsk/ainet/lang/tensor/Tensor;)Z @@ -3861,7 +4366,9 @@ public final class sk/ainet/lang/tensor/ops/VoidTensorOps : sk/ainet/lang/tensor public fun conv1d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIII)Lsk/ainet/lang/tensor/Tensor; public fun conv2d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;I)Lsk/ainet/lang/tensor/Tensor; public fun conv3d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Triple;Lkotlin/Triple;Lkotlin/Triple;I)Lsk/ainet/lang/tensor/Tensor; + public fun convTranspose1d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIII)Lsk/ainet/lang/tensor/Tensor; public fun convert (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/Tensor; + public fun cos (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun divScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public fun divide (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun elu (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; @@ -3873,6 +4380,9 @@ public final class sk/ainet/lang/tensor/ops/VoidTensorOps : sk/ainet/lang/tensor public fun gelu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun indexSelect (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun leakyRelu (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; + public fun log (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun log10 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun log2 (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun logSoftmax (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun lt (Lsk/ainet/lang/tensor/Tensor;F)Lsk/ainet/lang/tensor/Tensor; public fun matmul (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; @@ -3882,6 +4392,9 @@ public final class sk/ainet/lang/tensor/ops/VoidTensorOps : sk/ainet/lang/tensor public fun multiply (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun narrow (Lsk/ainet/lang/tensor/Tensor;III)Lsk/ainet/lang/tensor/Tensor; public fun pad2d (Lsk/ainet/lang/tensor/Tensor;IIII)Lsk/ainet/lang/tensor/Tensor; + public fun permute (Lsk/ainet/lang/tensor/Tensor;[I)Lsk/ainet/lang/tensor/Tensor; + public fun pow (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun powScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public fun rdivScalar (Ljava/lang/Number;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun relu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun reshape (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Shape;)Lsk/ainet/lang/tensor/Tensor; @@ -3890,6 +4403,7 @@ public final class sk/ainet/lang/tensor/ops/VoidTensorOps : sk/ainet/lang/tensor public fun sigmoid (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun sign (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun silu (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public fun sin (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun softmax (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun split (Lsk/ainet/lang/tensor/Tensor;II)Ljava/util/List; public fun sqrt (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; @@ -3897,6 +4411,7 @@ public final class sk/ainet/lang/tensor/ops/VoidTensorOps : sk/ainet/lang/tensor public fun subScalar (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;)Lsk/ainet/lang/tensor/Tensor; public fun subtract (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun sum (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;)Lsk/ainet/lang/tensor/Tensor; + public fun tanh (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun transpose (Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; public fun tril (Lsk/ainet/lang/tensor/Tensor;I)Lsk/ainet/lang/tensor/Tensor; public fun unfold (Lsk/ainet/lang/tensor/Tensor;III)Lsk/ainet/lang/tensor/Tensor; @@ -3905,23 +4420,975 @@ public final class sk/ainet/lang/tensor/ops/VoidTensorOps : sk/ainet/lang/tensor public fun variance (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Integer;)Lsk/ainet/lang/tensor/Tensor; } -public final class sk/ainet/lang/trace/CompositeSink : sk/ainet/lang/trace/OpSink { - public fun (Ljava/util/List;)V - public fun onOpExecuted (Lsk/ainet/lang/trace/OpTrace;)V +public final class sk/ainet/lang/tensor/ops/turboquant/BitPacker { + public static final field INSTANCE Lsk/ainet/lang/tensor/ops/turboquant/BitPacker; + public final fun pack ([BI)[B + public final fun packedSize (II)I + public final fun unpack ([BII)[B } -public final class sk/ainet/lang/trace/NoOpSink : sk/ainet/lang/trace/OpSink { - public static final field INSTANCE Lsk/ainet/lang/trace/NoOpSink; - public fun onOpExecuted (Lsk/ainet/lang/trace/OpTrace;)V +public final class sk/ainet/lang/tensor/ops/turboquant/EncodedResidual { + public fun ([BFII)V + public final fun component1 ()[B + public final fun component2 ()F + public final fun component3 ()I + public final fun component4 ()I + public final fun copy ([BFII)Lsk/ainet/lang/tensor/ops/turboquant/EncodedResidual; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/ops/turboquant/EncodedResidual;[BFIIILjava/lang/Object;)Lsk/ainet/lang/tensor/ops/turboquant/EncodedResidual; + public fun equals (Ljava/lang/Object;)Z + public final fun getElementCount ()I + public final fun getPacked ()[B + public final fun getPackedSizeBytes ()I + public final fun getResidualBits ()I + public final fun getScale ()F + public fun hashCode ()I + public fun toString ()Ljava/lang/String; } -public final class sk/ainet/lang/trace/OpAttributeFactory { - public static final field INSTANCE Lsk/ainet/lang/trace/OpAttributeFactory; - public final fun binary (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Ljava/util/Map; - public final fun conv2d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;I)Ljava/util/Map; - public final fun scalarOp (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;Lsk/ainet/lang/tensor/Tensor;Z)Ljava/util/Map; - public final fun shapesAndDTypes (Ljava/util/List;Ljava/util/List;)Ljava/util/Map; - public final fun unary (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Ljava/util/Map; +public final class sk/ainet/lang/tensor/ops/turboquant/QjlResidual { + public static final field INSTANCE Lsk/ainet/lang/tensor/ops/turboquant/QjlResidual; + public final fun decode (Lsk/ainet/lang/tensor/ops/turboquant/EncodedResidual;[FI)V + public final fun encode ([FII)Lsk/ainet/lang/tensor/ops/turboquant/EncodedResidual; +} + +public final class sk/ainet/lang/tensor/ops/turboquant/QuantizedVector { + public fun ([B[FI)V + public final fun component1 ()[B + public final fun component2 ()[F + public final fun component3 ()I + public final fun copy ([B[FI)Lsk/ainet/lang/tensor/ops/turboquant/QuantizedVector; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/ops/turboquant/QuantizedVector;[B[FIILjava/lang/Object;)Lsk/ainet/lang/tensor/ops/turboquant/QuantizedVector; + public fun equals (Ljava/lang/Object;)Z + public final fun getBits ()I + public final fun getCodes ()[B + public final fun getElementCount ()I + public final fun getNumGroups ()I + public final fun getScales ()[F + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/ops/turboquant/RandomRotation { + public static final field INSTANCE Lsk/ainet/lang/tensor/ops/turboquant/RandomRotation; + public final fun inverseRotate ([FI)V + public final fun rotate ([FI)V + public final fun seedFor (III)I +} + +public final class sk/ainet/lang/tensor/ops/turboquant/ScalarQuantizer { + public static final field GROUP_SIZE I + public static final field INSTANCE Lsk/ainet/lang/tensor/ops/turboquant/ScalarQuantizer; + public final fun dequantize (Lsk/ainet/lang/tensor/ops/turboquant/QuantizedVector;)[F + public final fun dequantizeInto ([B[F[FI)V + public static synthetic fun dequantizeInto$default (Lsk/ainet/lang/tensor/ops/turboquant/ScalarQuantizer;[B[F[FIILjava/lang/Object;)V + public final fun quantize ([FI)Lsk/ainet/lang/tensor/ops/turboquant/QuantizedVector; +} + +public final class sk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock { + public fun ([B[FIIILsk/ainet/lang/tensor/ops/turboquant/EncodedResidual;)V + public synthetic fun ([B[FIIILsk/ainet/lang/tensor/ops/turboquant/EncodedResidual;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()[B + public final fun component2 ()[F + public final fun component3 ()I + public final fun component4 ()I + public final fun component5 ()I + public final fun component6 ()Lsk/ainet/lang/tensor/ops/turboquant/EncodedResidual; + public final fun copy ([B[FIIILsk/ainet/lang/tensor/ops/turboquant/EncodedResidual;)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock;[B[FIIILsk/ainet/lang/tensor/ops/turboquant/EncodedResidual;ILjava/lang/Object;)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock; + public fun equals (Ljava/lang/Object;)Z + public final fun getBits ()I + public final fun getElementCount ()I + public final fun getPackedCodes ()[B + public final fun getResidual ()Lsk/ainet/lang/tensor/ops/turboquant/EncodedResidual; + public final fun getScales ()[F + public final fun getSeed ()I + public final fun getSizeInBytes ()I + public fun hashCode ()I + public final fun isPolarOnly ()Z + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/ops/turboquant/TurboQuantCodec { + public static final field INSTANCE Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantCodec; + public final fun decode (Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock;)[F + public final fun encode ([FLsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantBlock; + public final fun encodedSize (ILsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;)I +} + +public final class sk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig { + public static final field Companion Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig$Companion; + public fun ()V + public fun (IZII)V + public synthetic fun (IZIIILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()I + public final fun component2 ()Z + public final fun component3 ()I + public final fun component4 ()I + public final fun copy (IZII)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;IZIIILjava/lang/Object;)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; + public fun equals (Ljava/lang/Object;)Z + public final fun getBits ()I + public final fun getResidualBits ()I + public final fun getSeed ()I + public final fun getUseQjl ()Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig$Companion { + public final fun polarOnly (II)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; + public static synthetic fun polarOnly$default (Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig$Companion;IIILjava/lang/Object;)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; + public final fun polarPlusQjl (III)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; + public static synthetic fun polarPlusQjl$default (Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig$Companion;IIIILjava/lang/Object;)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; +} + +public final class sk/ainet/lang/tensor/ops/turboquant/TurboQuantPreset { + public fun (Ljava/lang/String;Lsk/ainet/lang/tensor/storage/KvCacheConfig;Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;)V + public final fun component1 ()Ljava/lang/String; + public final fun component2 ()Lsk/ainet/lang/tensor/storage/KvCacheConfig; + public final fun component3 ()Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; + public final fun component4 ()Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; + public final fun copy (Ljava/lang/String;Lsk/ainet/lang/tensor/storage/KvCacheConfig;Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantPreset; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantPreset;Ljava/lang/String;Lsk/ainet/lang/tensor/storage/KvCacheConfig;Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;ILjava/lang/Object;)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantPreset; + public fun equals (Ljava/lang/Object;)Z + public final fun getCacheConfig ()Lsk/ainet/lang/tensor/storage/KvCacheConfig; + public final fun getKeyQuantConfig ()Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; + public final fun getName ()Ljava/lang/String; + public final fun getValueQuantConfig ()Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/ops/turboquant/TurboQuantPresets { + public static final field INSTANCE Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantPresets; + public final fun balanced (IIII)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantPreset; + public final fun experimentalMax (IIII)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantPreset; + public final fun forModel (Ljava/lang/String;IIII)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantPreset; + public final fun getAvailablePresets ()Ljava/util/List; + public final fun safeLowbit (IIII)Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantPreset; +} + +public final class sk/ainet/lang/tensor/ops/turboquant/TurboQuantUsage { + public static final field INSTANCE Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantUsage; + public final fun exampleAsymmetricCache ()Lsk/ainet/lang/tensor/storage/KvCacheStore; + public final fun exampleGenerationLoop ()V + public final fun exampleLlamaCache ()Lsk/ainet/lang/tensor/storage/KvCacheStore; +} + +public final class sk/ainet/lang/tensor/scratch/NoopScratchPool : sk/ainet/lang/tensor/scratch/ScratchPool { + public static final field INSTANCE Lsk/ainet/lang/tensor/scratch/NoopScratchPool; + public fun acquireFloat (I)[F + public fun acquireFloatZeroed (I)[F + public fun scope (Lkotlin/jvm/functions/Function0;)Ljava/lang/Object; + public fun stats ()Lsk/ainet/lang/tensor/scratch/ScratchStats; +} + +public abstract interface class sk/ainet/lang/tensor/scratch/ScratchPool { + public abstract fun acquireFloat (I)[F + public abstract fun acquireFloatZeroed (I)[F + public abstract fun scope (Lkotlin/jvm/functions/Function0;)Ljava/lang/Object; + public abstract fun stats ()Lsk/ainet/lang/tensor/scratch/ScratchStats; +} + +public final class sk/ainet/lang/tensor/scratch/ScratchStats { + public fun (JJJI)V + public final fun component1 ()J + public final fun component2 ()J + public final fun component3 ()J + public final fun component4 ()I + public final fun copy (JJJI)Lsk/ainet/lang/tensor/scratch/ScratchStats; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/scratch/ScratchStats;JJJIILjava/lang/Object;)Lsk/ainet/lang/tensor/scratch/ScratchStats; + public fun equals (Ljava/lang/Object;)Z + public final fun getAcquireCount ()J + public final fun getActiveBuffers ()I + public final fun getCacheHits ()J + public final fun getHighWaterBytes ()J + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/scratch/SizeClassedScratchPool : sk/ainet/lang/tensor/scratch/ScratchPool { + public static final field Companion Lsk/ainet/lang/tensor/scratch/SizeClassedScratchPool$Companion; + public static final field LOG_MIN_SIZE I + public static final field MAX_CLASSES I + public static final field MIN_SIZE I + public fun ()V + public fun (I)V + public synthetic fun (IILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun acquireFloat (I)[F + public fun acquireFloatZeroed (I)[F + public fun scope (Lkotlin/jvm/functions/Function0;)Ljava/lang/Object; + public fun stats ()Lsk/ainet/lang/tensor/scratch/ScratchStats; +} + +public final class sk/ainet/lang/tensor/scratch/SizeClassedScratchPool$Companion { + public final fun sizeClass (I)I + public final fun sizeForClass (I)I +} + +public final class sk/ainet/lang/tensor/storage/ActiveMemoryTracker { + public static final field INSTANCE Lsk/ainet/lang/tensor/storage/ActiveMemoryTracker; + public final fun getCurrent ()Lsk/ainet/lang/tensor/storage/MemoryTracker; + public final fun recordCopy (Ljava/lang/String;J)V + public final fun setCurrent (Lsk/ainet/lang/tensor/storage/MemoryTracker;)V +} + +public final class sk/ainet/lang/tensor/storage/AggregateMemoryReport { + public fun (IJJJIIIIJJLjava/util/List;)V + public final fun component1 ()I + public final fun component10 ()J + public final fun component11 ()Ljava/util/List; + public final fun component2 ()J + public final fun component3 ()J + public final fun component4 ()J + public final fun component5 ()I + public final fun component6 ()I + public final fun component7 ()I + public final fun component8 ()I + public final fun component9 ()J + public final fun copy (IJJJIIIIJJLjava/util/List;)Lsk/ainet/lang/tensor/storage/AggregateMemoryReport; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/AggregateMemoryReport;IJJJIIIIJJLjava/util/List;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/AggregateMemoryReport; + public fun equals (Ljava/lang/Object;)Z + public final fun getAliasedCount ()I + public final fun getBorrowedCount ()I + public final fun getCopyBytes ()J + public final fun getCopyCount ()J + public final fun getEntries ()Ljava/util/List; + public final fun getFileBackedBytes ()J + public final fun getFileBackedCount ()I + public final fun getOverallCompressionRatio ()D + public final fun getOwnedCount ()I + public final fun getTensorCount ()I + public final fun getTotalLogicalBytes ()J + public final fun getTotalPhysicalBytes ()J + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public abstract interface class sk/ainet/lang/tensor/storage/BufferAccessor : java/lang/AutoCloseable { + public abstract fun getSizeInBytes ()J + public fun readAllBytes ()[B + public abstract fun readByte (J)B + public abstract fun readBytes (JI)[B +} + +public final class sk/ainet/lang/tensor/storage/BufferAccessor$DefaultImpls { + public static fun readAllBytes (Lsk/ainet/lang/tensor/storage/BufferAccessor;)[B +} + +public abstract interface class sk/ainet/lang/tensor/storage/BufferHandle { + public abstract fun getOwnership ()Lsk/ainet/lang/tensor/storage/Ownership; + public abstract fun getSizeInBytes ()J + public abstract fun isMutable ()Z +} + +public final class sk/ainet/lang/tensor/storage/BufferHandle$Aliased : sk/ainet/lang/tensor/storage/BufferHandle { + public fun (Lsk/ainet/lang/tensor/storage/BufferHandle;JJ)V + public final fun getByteOffset ()J + public fun getOwnership ()Lsk/ainet/lang/tensor/storage/Ownership; + public final fun getParent ()Lsk/ainet/lang/tensor/storage/BufferHandle; + public fun getSizeInBytes ()J + public fun isMutable ()Z +} + +public final class sk/ainet/lang/tensor/storage/BufferHandle$Borrowed : sk/ainet/lang/tensor/storage/BufferHandle { + public fun ([BIJZ)V + public synthetic fun ([BIJZILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun getData ()[B + public final fun getOffset ()I + public fun getOwnership ()Lsk/ainet/lang/tensor/storage/Ownership; + public fun getSizeInBytes ()J + public fun isMutable ()Z +} + +public final class sk/ainet/lang/tensor/storage/BufferHandle$DeviceResident : sk/ainet/lang/tensor/storage/BufferHandle { + public fun (Ljava/lang/String;Ljava/lang/Object;JZ)V + public final fun getBackendHandle ()Ljava/lang/Object; + public final fun getDeviceId ()Ljava/lang/String; + public fun getOwnership ()Lsk/ainet/lang/tensor/storage/Ownership; + public fun getSizeInBytes ()J + public fun isMutable ()Z +} + +public final class sk/ainet/lang/tensor/storage/BufferHandle$FileBacked : sk/ainet/lang/tensor/storage/BufferHandle { + public fun (Ljava/lang/String;JJ)V + public final fun getFileOffset ()J + public fun getOwnership ()Lsk/ainet/lang/tensor/storage/Ownership; + public final fun getPath ()Ljava/lang/String; + public fun getSizeInBytes ()J + public fun isMutable ()Z +} + +public final class sk/ainet/lang/tensor/storage/BufferHandle$Owned : sk/ainet/lang/tensor/storage/BufferHandle { + public fun ([BIJ)V + public synthetic fun ([BIJILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun getData ()[B + public final fun getOffset ()I + public fun getOwnership ()Lsk/ainet/lang/tensor/storage/Ownership; + public fun getSizeInBytes ()J + public fun isMutable ()Z +} + +public final class sk/ainet/lang/tensor/storage/BufferHandleFactory { + public static final field INSTANCE Lsk/ainet/lang/tensor/storage/BufferHandleFactory; + public final fun borrow ([BIIZ)Lsk/ainet/lang/tensor/storage/BufferHandle$Borrowed; + public final fun borrow ([BZ)Lsk/ainet/lang/tensor/storage/BufferHandle$Borrowed; + public static synthetic fun borrow$default (Lsk/ainet/lang/tensor/storage/BufferHandleFactory;[BIIZILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/BufferHandle$Borrowed; + public static synthetic fun borrow$default (Lsk/ainet/lang/tensor/storage/BufferHandleFactory;[BZILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/BufferHandle$Borrowed; + public final fun fileBacked (Ljava/lang/String;JJ)Lsk/ainet/lang/tensor/storage/BufferHandle$FileBacked; + public final fun owned ([B)Lsk/ainet/lang/tensor/storage/BufferHandle$Owned; + public final fun owned ([F)Lsk/ainet/lang/tensor/storage/BufferHandle$Owned; + public final fun owned ([I)Lsk/ainet/lang/tensor/storage/BufferHandle$Owned; + public final fun slice (Lsk/ainet/lang/tensor/storage/BufferHandle;JJ)Lsk/ainet/lang/tensor/storage/BufferHandle$Aliased; +} + +public abstract interface class sk/ainet/lang/tensor/storage/BufferResolver { + public abstract fun resolve (Lsk/ainet/lang/tensor/storage/BufferHandle;)Lsk/ainet/lang/tensor/storage/BufferAccessor; +} + +public final class sk/ainet/lang/tensor/storage/ByteArrayAccessor : sk/ainet/lang/tensor/storage/BufferAccessor { + public fun ([BIJ)V + public synthetic fun ([BIJILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun close ()V + public fun getSizeInBytes ()J + public fun readAllBytes ()[B + public fun readByte (J)B + public fun readBytes (JI)[B + public final fun sliced (JJ)Lsk/ainet/lang/tensor/storage/ByteArrayAccessor; +} + +public final class sk/ainet/lang/tensor/storage/CompressedKvAttention { + public fun (Lsk/ainet/lang/tensor/storage/KvCacheStore;Lsk/ainet/lang/tensor/storage/CompressedKvAttention$DequantStrategy;)V + public synthetic fun (Lsk/ainet/lang/tensor/storage/KvCacheStore;Lsk/ainet/lang/tensor/storage/CompressedKvAttention$DequantStrategy;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun isKeyCompressed ()Z + public final fun isValueCompressed ()Z + public final fun loadKeyStorageRaw (III)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun loadKeyStorageRaw$default (Lsk/ainet/lang/tensor/storage/CompressedKvAttention;IIIILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun loadKeysForAttention (III)[F + public static synthetic fun loadKeysForAttention$default (Lsk/ainet/lang/tensor/storage/CompressedKvAttention;IIIILjava/lang/Object;)[F + public final fun loadValueStorageRaw (III)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun loadValueStorageRaw$default (Lsk/ainet/lang/tensor/storage/CompressedKvAttention;IIIILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun loadValuesForAttention (III)[F + public static synthetic fun loadValuesForAttention$default (Lsk/ainet/lang/tensor/storage/CompressedKvAttention;IIIILjava/lang/Object;)[F + public final fun storeKeyValue (I[F[F)V +} + +public final class sk/ainet/lang/tensor/storage/CompressedKvAttention$DequantStrategy : java/lang/Enum { + public static final field FULL_TILE Lsk/ainet/lang/tensor/storage/CompressedKvAttention$DequantStrategy; + public static final field RAW_STORAGE Lsk/ainet/lang/tensor/storage/CompressedKvAttention$DequantStrategy; + public static fun getEntries ()Lkotlin/enums/EnumEntries; + public static fun valueOf (Ljava/lang/String;)Lsk/ainet/lang/tensor/storage/CompressedKvAttention$DequantStrategy; + public static fun values ()[Lsk/ainet/lang/tensor/storage/CompressedKvAttention$DequantStrategy; +} + +public final class sk/ainet/lang/tensor/storage/DefaultBufferResolver : sk/ainet/lang/tensor/storage/BufferResolver { + public fun ()V + public fun (Lkotlin/jvm/functions/Function1;)V + public synthetic fun (Lkotlin/jvm/functions/Function1;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun resolve (Lsk/ainet/lang/tensor/storage/BufferHandle;)Lsk/ainet/lang/tensor/storage/BufferAccessor; +} + +public final class sk/ainet/lang/tensor/storage/DefaultKvCacheStore : sk/ainet/lang/tensor/storage/KvCacheStore { + public fun (Lsk/ainet/lang/tensor/storage/KvCacheConfig;)V + public fun appendToken (I[F[F)V + public fun clear ()V + public fun evict (I)V + public fun getCurrentSeqLen ()I + public fun getHeadDim ()I + public fun getKeyEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public fun getMaxSeqLen ()I + public fun getNumHeads ()I + public fun getNumLayers ()I + public fun getPlacement ()Lsk/ainet/lang/tensor/storage/Placement; + public fun getValueEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public fun memoryReport ()Lsk/ainet/lang/tensor/storage/KvCacheMemoryReport; + public fun readKeyStorage (III)Lsk/ainet/lang/tensor/storage/TensorStorage; + public fun readKeys (III)[F + public fun readValueStorage (III)Lsk/ainet/lang/tensor/storage/TensorStorage; + public fun readValues (III)[F +} + +public final class sk/ainet/lang/tensor/storage/DeviceKind : java/lang/Enum { + public static final field AUTO Lsk/ainet/lang/tensor/storage/DeviceKind; + public static final field CPU Lsk/ainet/lang/tensor/storage/DeviceKind; + public static final field GPU Lsk/ainet/lang/tensor/storage/DeviceKind; + public static final field NPU Lsk/ainet/lang/tensor/storage/DeviceKind; + public static fun getEntries ()Lkotlin/enums/EnumEntries; + public static fun valueOf (Ljava/lang/String;)Lsk/ainet/lang/tensor/storage/DeviceKind; + public static fun values ()[Lsk/ainet/lang/tensor/storage/DeviceKind; +} + +public abstract interface annotation class sk/ainet/lang/tensor/storage/KvCache : java/lang/annotation/Annotation { + public abstract fun device ()Lsk/ainet/lang/tensor/storage/DeviceKind; + public abstract fun keyBits ()I + public abstract fun maxSeqLen ()I + public abstract fun preset ()Ljava/lang/String; + public abstract fun useQjl ()Z + public abstract fun valueBits ()I +} + +public final class sk/ainet/lang/tensor/storage/KvCacheAnnotationResolver { + public static final field INSTANCE Lsk/ainet/lang/tensor/storage/KvCacheAnnotationResolver; + public final fun resolve (Ljava/lang/String;IIII)Lsk/ainet/lang/tensor/storage/KvCacheStore; + public final fun resolve (Lsk/ainet/lang/tensor/storage/KvCache;IIII)Lsk/ainet/lang/tensor/storage/KvCacheStore; +} + +public abstract interface annotation class sk/ainet/lang/tensor/storage/KvCacheBypass : java/lang/annotation/Annotation { +} + +public final class sk/ainet/lang/tensor/storage/KvCacheConfig { + public static final field Companion Lsk/ainet/lang/tensor/storage/KvCacheConfig$Companion; + public fun (IIIILsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Placement;)V + public synthetic fun (IIIILsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Placement;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()I + public final fun component2 ()I + public final fun component3 ()I + public final fun component4 ()I + public final fun component5 ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun component6 ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun component7 ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun copy (IIIILsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/KvCacheConfig; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/KvCacheConfig;IIIILsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Placement;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/KvCacheConfig; + public fun equals (Ljava/lang/Object;)Z + public final fun getHeadDim ()I + public final fun getKeyEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun getMaxSeqLen ()I + public final fun getNumHeads ()I + public final fun getNumLayers ()I + public final fun getPlacement ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun getValueEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/KvCacheConfig$Companion { + public final fun dense (IIII)Lsk/ainet/lang/tensor/storage/KvCacheConfig; + public final fun q8 (IIII)Lsk/ainet/lang/tensor/storage/KvCacheConfig; +} + +public final class sk/ainet/lang/tensor/storage/KvCacheMemoryReport { + public fun (IIIIILsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Placement;JJJJ)V + public final fun component1 ()I + public final fun component10 ()J + public final fun component11 ()J + public final fun component12 ()J + public final fun component2 ()I + public final fun component3 ()I + public final fun component4 ()I + public final fun component5 ()I + public final fun component6 ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun component7 ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun component8 ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun component9 ()J + public final fun copy (IIIIILsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Placement;JJJJ)Lsk/ainet/lang/tensor/storage/KvCacheMemoryReport; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/KvCacheMemoryReport;IIIIILsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Placement;JJJJILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/KvCacheMemoryReport; + public fun equals (Ljava/lang/Object;)Z + public final fun getCompressionRatio ()D + public final fun getCurrentSeqLen ()I + public final fun getHeadDim ()I + public final fun getKeyEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun getKeyLogicalBytes ()J + public final fun getKeyPhysicalBytes ()J + public final fun getMaxSeqLen ()I + public final fun getNumHeads ()I + public final fun getNumLayers ()I + public final fun getPlacement ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun getTotalLogicalBytes ()J + public final fun getTotalPhysicalBytes ()J + public final fun getUtilizationRatio ()D + public final fun getValueEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun getValueLogicalBytes ()J + public final fun getValuePhysicalBytes ()J + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public abstract interface class sk/ainet/lang/tensor/storage/KvCacheStore { + public static final field Companion Lsk/ainet/lang/tensor/storage/KvCacheStore$Companion; + public abstract fun appendToken (I[F[F)V + public abstract fun clear ()V + public abstract fun evict (I)V + public abstract fun getCurrentSeqLen ()I + public abstract fun getHeadDim ()I + public abstract fun getKeyEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public abstract fun getMaxSeqLen ()I + public abstract fun getNumHeads ()I + public abstract fun getNumLayers ()I + public abstract fun getPlacement ()Lsk/ainet/lang/tensor/storage/Placement; + public abstract fun getValueEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public abstract fun memoryReport ()Lsk/ainet/lang/tensor/storage/KvCacheMemoryReport; + public abstract fun readKeyStorage (III)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun readKeyStorage$default (Lsk/ainet/lang/tensor/storage/KvCacheStore;IIIILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public abstract fun readKeys (III)[F + public static synthetic fun readKeys$default (Lsk/ainet/lang/tensor/storage/KvCacheStore;IIIILjava/lang/Object;)[F + public abstract fun readValueStorage (III)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun readValueStorage$default (Lsk/ainet/lang/tensor/storage/KvCacheStore;IIIILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public abstract fun readValues (III)[F + public static synthetic fun readValues$default (Lsk/ainet/lang/tensor/storage/KvCacheStore;IIIILjava/lang/Object;)[F +} + +public final class sk/ainet/lang/tensor/storage/KvCacheStore$Companion { + public final fun dense (IIII)Lsk/ainet/lang/tensor/storage/KvCacheStore; + public final fun fromPreset (Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantPreset;)Lsk/ainet/lang/tensor/storage/KvCacheStore; + public final fun turboQuant (IIIIIIZ)Lsk/ainet/lang/tensor/storage/KvCacheStore; + public final fun turboQuant (Ljava/lang/String;IIII)Lsk/ainet/lang/tensor/storage/KvCacheStore; + public static synthetic fun turboQuant$default (Lsk/ainet/lang/tensor/storage/KvCacheStore$Companion;IIIIIIZILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/KvCacheStore; +} + +public final class sk/ainet/lang/tensor/storage/KvCacheStore$DefaultImpls { + public static synthetic fun readKeyStorage$default (Lsk/ainet/lang/tensor/storage/KvCacheStore;IIIILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun readKeys$default (Lsk/ainet/lang/tensor/storage/KvCacheStore;IIIILjava/lang/Object;)[F + public static synthetic fun readValueStorage$default (Lsk/ainet/lang/tensor/storage/KvCacheStore;IIIILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun readValues$default (Lsk/ainet/lang/tensor/storage/KvCacheStore;IIIILjava/lang/Object;)[F +} + +public final class sk/ainet/lang/tensor/storage/LogicalDType : java/lang/Enum { + public static final field BFLOAT16 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field Companion Lsk/ainet/lang/tensor/storage/LogicalDType$Companion; + public static final field FLOAT16 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field FLOAT32 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field FLOAT64 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field INT16 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field INT32 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field INT4 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field INT64 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field INT8 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field TERNARY Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field UINT16 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field UINT32 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field UINT64 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static final field UINT8 Lsk/ainet/lang/tensor/storage/LogicalDType; + public static fun getEntries ()Lkotlin/enums/EnumEntries; + public final fun getSizeInBits ()I + public final fun getSizeInBytes ()I + public final fun isFloatingPoint ()Z + public final fun isSigned ()Z + public static fun valueOf (Ljava/lang/String;)Lsk/ainet/lang/tensor/storage/LogicalDType; + public static fun values ()[Lsk/ainet/lang/tensor/storage/LogicalDType; +} + +public final class sk/ainet/lang/tensor/storage/LogicalDType$Companion { + public final fun fromDType (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/storage/LogicalDType; +} + +public final class sk/ainet/lang/tensor/storage/MemoryDomain : java/lang/Enum { + public static final field DEVICE_LOCAL Lsk/ainet/lang/tensor/storage/MemoryDomain; + public static final field HOST_HEAP Lsk/ainet/lang/tensor/storage/MemoryDomain; + public static final field HOST_PINNED Lsk/ainet/lang/tensor/storage/MemoryDomain; + public static final field MMAP_FILE Lsk/ainet/lang/tensor/storage/MemoryDomain; + public static final field UNIFIED Lsk/ainet/lang/tensor/storage/MemoryDomain; + public static fun getEntries ()Lkotlin/enums/EnumEntries; + public static fun valueOf (Ljava/lang/String;)Lsk/ainet/lang/tensor/storage/MemoryDomain; + public static fun values ()[Lsk/ainet/lang/tensor/storage/MemoryDomain; +} + +public final class sk/ainet/lang/tensor/storage/MemoryPlanner { + public fun ()V + public fun (Ljava/util/Set;)V + public synthetic fun (Ljava/util/Set;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun resolve (Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/ResolvedPlacement; + public final fun suggestActivationPlacement ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun suggestWeightPlacement (Z)Lsk/ainet/lang/tensor/storage/Placement; +} + +public final class sk/ainet/lang/tensor/storage/MemoryTracker { + public fun ()V + public final fun clear ()V + public final fun record (Ljava/lang/String;Lsk/ainet/lang/tensor/storage/TensorStorage;)V + public final fun recordCopy (Ljava/lang/String;J)V + public final fun report ()Lsk/ainet/lang/tensor/storage/AggregateMemoryReport; +} + +public final class sk/ainet/lang/tensor/storage/Ownership : java/lang/Enum { + public static final field ALIASED Lsk/ainet/lang/tensor/storage/Ownership; + public static final field BORROWED Lsk/ainet/lang/tensor/storage/Ownership; + public static final field DEVICE_RESIDENT Lsk/ainet/lang/tensor/storage/Ownership; + public static final field FILE_BACKED Lsk/ainet/lang/tensor/storage/Ownership; + public static final field OWNED Lsk/ainet/lang/tensor/storage/Ownership; + public static fun getEntries ()Lkotlin/enums/EnumEntries; + public static fun valueOf (Ljava/lang/String;)Lsk/ainet/lang/tensor/storage/Ownership; + public static fun values ()[Lsk/ainet/lang/tensor/storage/Ownership; +} + +public abstract interface class sk/ainet/lang/tensor/storage/PackedBlockStorage { + public abstract fun dequantizeBlock (I[FI)V + public static synthetic fun dequantizeBlock$default (Lsk/ainet/lang/tensor/storage/PackedBlockStorage;I[FIILjava/lang/Object;)V + public abstract fun getBlockCount ()I + public abstract fun getBlockSize ()I + public fun getElementCount ()J + public abstract fun getEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public abstract fun getPackedData ()[B + public fun getPhysicalBytes ()J + public abstract fun getShape ()Lsk/ainet/lang/tensor/Shape; + public fun toFloatArray ()[F + public fun toTensorStorage (Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun toTensorStorage$default (Lsk/ainet/lang/tensor/storage/PackedBlockStorage;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/Placement;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; +} + +public final class sk/ainet/lang/tensor/storage/PackedBlockStorage$DefaultImpls { + public static synthetic fun dequantizeBlock$default (Lsk/ainet/lang/tensor/storage/PackedBlockStorage;I[FIILjava/lang/Object;)V + public static fun getElementCount (Lsk/ainet/lang/tensor/storage/PackedBlockStorage;)J + public static fun getPhysicalBytes (Lsk/ainet/lang/tensor/storage/PackedBlockStorage;)J + public static fun toFloatArray (Lsk/ainet/lang/tensor/storage/PackedBlockStorage;)[F + public static fun toTensorStorage (Lsk/ainet/lang/tensor/storage/PackedBlockStorage;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun toTensorStorage$default (Lsk/ainet/lang/tensor/storage/PackedBlockStorage;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/Placement;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; +} + +public abstract interface annotation class sk/ainet/lang/tensor/storage/Place : java/lang/annotation/Annotation { + public abstract fun device ()Lsk/ainet/lang/tensor/storage/DeviceKind; + public abstract fun memory ()Lsk/ainet/lang/tensor/storage/MemoryDomain; + public abstract fun requirement ()Lsk/ainet/lang/tensor/storage/Requirement; +} + +public final class sk/ainet/lang/tensor/storage/Placement { + public static final field Companion Lsk/ainet/lang/tensor/storage/Placement$Companion; + public fun ()V + public fun (Lsk/ainet/lang/tensor/storage/DeviceKind;Lsk/ainet/lang/tensor/storage/MemoryDomain;Lsk/ainet/lang/tensor/storage/Residency;Lsk/ainet/lang/tensor/storage/Requirement;Lsk/ainet/lang/tensor/storage/DeviceKind;)V + public synthetic fun (Lsk/ainet/lang/tensor/storage/DeviceKind;Lsk/ainet/lang/tensor/storage/MemoryDomain;Lsk/ainet/lang/tensor/storage/Residency;Lsk/ainet/lang/tensor/storage/Requirement;Lsk/ainet/lang/tensor/storage/DeviceKind;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Lsk/ainet/lang/tensor/storage/DeviceKind; + public final fun component2 ()Lsk/ainet/lang/tensor/storage/MemoryDomain; + public final fun component3 ()Lsk/ainet/lang/tensor/storage/Residency; + public final fun component4 ()Lsk/ainet/lang/tensor/storage/Requirement; + public final fun component5 ()Lsk/ainet/lang/tensor/storage/DeviceKind; + public final fun copy (Lsk/ainet/lang/tensor/storage/DeviceKind;Lsk/ainet/lang/tensor/storage/MemoryDomain;Lsk/ainet/lang/tensor/storage/Residency;Lsk/ainet/lang/tensor/storage/Requirement;Lsk/ainet/lang/tensor/storage/DeviceKind;)Lsk/ainet/lang/tensor/storage/Placement; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/Placement;Lsk/ainet/lang/tensor/storage/DeviceKind;Lsk/ainet/lang/tensor/storage/MemoryDomain;Lsk/ainet/lang/tensor/storage/Residency;Lsk/ainet/lang/tensor/storage/Requirement;Lsk/ainet/lang/tensor/storage/DeviceKind;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/Placement; + public fun equals (Ljava/lang/Object;)Z + public final fun getDevice ()Lsk/ainet/lang/tensor/storage/DeviceKind; + public final fun getDomain ()Lsk/ainet/lang/tensor/storage/MemoryDomain; + public final fun getFallback ()Lsk/ainet/lang/tensor/storage/DeviceKind; + public final fun getRequirement ()Lsk/ainet/lang/tensor/storage/Requirement; + public final fun getResidency ()Lsk/ainet/lang/tensor/storage/Residency; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/Placement$Companion { + public final fun getCPU_HEAP ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun getGPU_PREFERRED ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun getMMAP_WEIGHTS ()Lsk/ainet/lang/tensor/storage/Placement; +} + +public final class sk/ainet/lang/tensor/storage/PlacementUnavailableException : java/lang/RuntimeException { + public fun (Ljava/lang/String;)V +} + +public final class sk/ainet/lang/tensor/storage/Requirement : java/lang/Enum { + public static final field PREFERRED Lsk/ainet/lang/tensor/storage/Requirement; + public static final field REQUIRED Lsk/ainet/lang/tensor/storage/Requirement; + public static fun getEntries ()Lkotlin/enums/EnumEntries; + public static fun valueOf (Ljava/lang/String;)Lsk/ainet/lang/tensor/storage/Requirement; + public static fun values ()[Lsk/ainet/lang/tensor/storage/Requirement; +} + +public final class sk/ainet/lang/tensor/storage/Residency : java/lang/Enum { + public static final field PERSISTENT Lsk/ainet/lang/tensor/storage/Residency; + public static final field TRANSIENT Lsk/ainet/lang/tensor/storage/Residency; + public static fun getEntries ()Lkotlin/enums/EnumEntries; + public static fun valueOf (Ljava/lang/String;)Lsk/ainet/lang/tensor/storage/Residency; + public static fun values ()[Lsk/ainet/lang/tensor/storage/Residency; +} + +public final class sk/ainet/lang/tensor/storage/ResolvedPlacement { + public fun (Lsk/ainet/lang/tensor/storage/Placement;Z)V + public final fun component1 ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun component2 ()Z + public final fun copy (Lsk/ainet/lang/tensor/storage/Placement;Z)Lsk/ainet/lang/tensor/storage/ResolvedPlacement; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/ResolvedPlacement;Lsk/ainet/lang/tensor/storage/Placement;ZILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/ResolvedPlacement; + public fun equals (Ljava/lang/Object;)Z + public final fun getActual ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun getUsedFallback ()Z + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/StorageMemoryReport { + public fun (Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Ownership;Lsk/ainet/lang/tensor/storage/Placement;JJZZZ)V + public final fun component1 ()Lsk/ainet/lang/tensor/Shape; + public final fun component10 ()Z + public final fun component2 ()Lsk/ainet/lang/tensor/storage/LogicalDType; + public final fun component3 ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun component4 ()Lsk/ainet/lang/tensor/storage/Ownership; + public final fun component5 ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun component6 ()J + public final fun component7 ()J + public final fun component8 ()Z + public final fun component9 ()Z + public final fun copy (Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Ownership;Lsk/ainet/lang/tensor/storage/Placement;JJZZZ)Lsk/ainet/lang/tensor/storage/StorageMemoryReport; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/StorageMemoryReport;Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Ownership;Lsk/ainet/lang/tensor/storage/Placement;JJZZZILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/StorageMemoryReport; + public fun equals (Ljava/lang/Object;)Z + public final fun getCompressionRatio ()D + public final fun getEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun getLogicalBytes ()J + public final fun getLogicalType ()Lsk/ainet/lang/tensor/storage/LogicalDType; + public final fun getOwnership ()Lsk/ainet/lang/tensor/storage/Ownership; + public final fun getPhysicalBytes ()J + public final fun getPlacement ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun getShape ()Lsk/ainet/lang/tensor/Shape; + public fun hashCode ()I + public final fun isAlias ()Z + public final fun isFileBacked ()Z + public final fun isMutable ()Z + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/StorageSpec { + public static final field Companion Lsk/ainet/lang/tensor/storage/StorageSpec$Companion; + public fun (Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Ownership;Lsk/ainet/lang/tensor/storage/Placement;)V + public synthetic fun (Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Ownership;Lsk/ainet/lang/tensor/storage/Placement;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Lsk/ainet/lang/tensor/storage/LogicalDType; + public final fun component2 ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun component3 ()Lsk/ainet/lang/tensor/storage/Ownership; + public final fun component4 ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun copy (Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Ownership;Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/StorageSpec; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/StorageSpec;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/Ownership;Lsk/ainet/lang/tensor/storage/Placement;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/StorageSpec; + public fun equals (Ljava/lang/Object;)Z + public final fun getEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun getLogicalType ()Lsk/ainet/lang/tensor/storage/LogicalDType; + public final fun getOwnership ()Lsk/ainet/lang/tensor/storage/Ownership; + public final fun getPlacement ()Lsk/ainet/lang/tensor/storage/Placement; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/StorageSpec$Companion { + public final fun borrowed (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/storage/StorageSpec; + public final fun fromDType (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/storage/StorageSpec; + public final fun mmapWeights (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/tensor/storage/StorageSpec; + public final fun q4k (Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/StorageSpec; + public static synthetic fun q4k$default (Lsk/ainet/lang/tensor/storage/StorageSpec$Companion;Lsk/ainet/lang/tensor/storage/Placement;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/StorageSpec; + public final fun q80 (Lsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/StorageSpec; + public static synthetic fun q80$default (Lsk/ainet/lang/tensor/storage/StorageSpec$Companion;Lsk/ainet/lang/tensor/storage/Placement;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/StorageSpec; +} + +public abstract interface class sk/ainet/lang/tensor/storage/TensorEncoding { + public abstract fun getName ()Ljava/lang/String; + public abstract fun physicalBytes (J)Ljava/lang/Long; +} + +public final class sk/ainet/lang/tensor/storage/TensorEncoding$Dense : sk/ainet/lang/tensor/storage/TensorEncoding { + public fun (I)V + public final fun component1 ()I + public final fun copy (I)Lsk/ainet/lang/tensor/storage/TensorEncoding$Dense; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/TensorEncoding$Dense;IILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorEncoding$Dense; + public fun equals (Ljava/lang/Object;)Z + public final fun getBytesPerElement ()I + public fun getName ()Ljava/lang/String; + public fun hashCode ()I + public fun physicalBytes (J)Ljava/lang/Long; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorEncoding$Opaque : sk/ainet/lang/tensor/storage/TensorEncoding { + public fun (Ljava/lang/String;J)V + public final fun component1 ()Ljava/lang/String; + public final fun component2 ()J + public final fun copy (Ljava/lang/String;J)Lsk/ainet/lang/tensor/storage/TensorEncoding$Opaque; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/TensorEncoding$Opaque;Ljava/lang/String;JILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorEncoding$Opaque; + public fun equals (Ljava/lang/Object;)Z + public fun getName ()Ljava/lang/String; + public final fun getRawBytes ()J + public fun hashCode ()I + public fun physicalBytes (J)Ljava/lang/Long; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorEncoding$Q4_0 : sk/ainet/lang/tensor/storage/TensorEncoding { + public static final field BLOCK_SIZE I + public static final field BYTES_PER_BLOCK I + public static final field INSTANCE Lsk/ainet/lang/tensor/storage/TensorEncoding$Q4_0; + public fun equals (Ljava/lang/Object;)Z + public fun getName ()Ljava/lang/String; + public fun hashCode ()I + public fun physicalBytes (J)Ljava/lang/Long; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorEncoding$Q4_K : sk/ainet/lang/tensor/storage/TensorEncoding { + public static final field BLOCK_SIZE I + public static final field BYTES_PER_BLOCK I + public static final field INSTANCE Lsk/ainet/lang/tensor/storage/TensorEncoding$Q4_K; + public fun equals (Ljava/lang/Object;)Z + public fun getName ()Ljava/lang/String; + public fun hashCode ()I + public fun physicalBytes (J)Ljava/lang/Long; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorEncoding$Q6_K : sk/ainet/lang/tensor/storage/TensorEncoding { + public static final field BLOCK_SIZE I + public static final field BYTES_PER_BLOCK I + public static final field INSTANCE Lsk/ainet/lang/tensor/storage/TensorEncoding$Q6_K; + public fun equals (Ljava/lang/Object;)Z + public fun getName ()Ljava/lang/String; + public fun hashCode ()I + public fun physicalBytes (J)Ljava/lang/Long; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorEncoding$Q8_0 : sk/ainet/lang/tensor/storage/TensorEncoding { + public static final field BLOCK_SIZE I + public static final field BYTES_PER_BLOCK I + public static final field INSTANCE Lsk/ainet/lang/tensor/storage/TensorEncoding$Q8_0; + public fun equals (Ljava/lang/Object;)Z + public fun getName ()Ljava/lang/String; + public fun hashCode ()I + public fun physicalBytes (J)Ljava/lang/Long; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorEncoding$TernaryPacked : sk/ainet/lang/tensor/storage/TensorEncoding { + public static final field INSTANCE Lsk/ainet/lang/tensor/storage/TensorEncoding$TernaryPacked; + public fun equals (Ljava/lang/Object;)Z + public fun getName ()Ljava/lang/String; + public fun hashCode ()I + public fun physicalBytes (J)Ljava/lang/Long; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorEncoding$TurboQuantPolar : sk/ainet/lang/tensor/storage/TensorEncoding { + public fun ()V + public fun (II)V + public synthetic fun (IIILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()I + public final fun component2 ()I + public final fun copy (II)Lsk/ainet/lang/tensor/storage/TensorEncoding$TurboQuantPolar; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/TensorEncoding$TurboQuantPolar;IIILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorEncoding$TurboQuantPolar; + public fun equals (Ljava/lang/Object;)Z + public final fun getBitsPerElement ()I + public final fun getBlockSize ()I + public fun getName ()Ljava/lang/String; + public final fun getNumGroups ()I + public fun hashCode ()I + public fun physicalBytes (J)Ljava/lang/Long; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorEncoding$TurboQuantPolarQjl : sk/ainet/lang/tensor/storage/TensorEncoding { + public fun ()V + public fun (III)V + public synthetic fun (IIIILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()I + public final fun component2 ()I + public final fun component3 ()I + public final fun copy (III)Lsk/ainet/lang/tensor/storage/TensorEncoding$TurboQuantPolarQjl; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/TensorEncoding$TurboQuantPolarQjl;IIIILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorEncoding$TurboQuantPolarQjl; + public fun equals (Ljava/lang/Object;)Z + public final fun getBitsPerElement ()I + public final fun getBlockSize ()I + public fun getName ()Ljava/lang/String; + public final fun getNumGroups ()I + public final fun getResidualBits ()I + public fun hashCode ()I + public fun physicalBytes (J)Ljava/lang/Long; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorStorage { + public fun (Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/BufferHandle;Lsk/ainet/lang/tensor/storage/Placement;J[JZ)V + public synthetic fun (Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/BufferHandle;Lsk/ainet/lang/tensor/storage/Placement;J[JZILkotlin/jvm/internal/DefaultConstructorMarker;)V + public final fun component1 ()Lsk/ainet/lang/tensor/Shape; + public final fun component2 ()Lsk/ainet/lang/tensor/storage/LogicalDType; + public final fun component3 ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun component4 ()Lsk/ainet/lang/tensor/storage/BufferHandle; + public final fun component5 ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun component6 ()J + public final fun component7 ()[J + public final fun component8 ()Z + public final fun copy (Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/BufferHandle;Lsk/ainet/lang/tensor/storage/Placement;J[JZ)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/TensorStorage;Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Lsk/ainet/lang/tensor/storage/BufferHandle;Lsk/ainet/lang/tensor/storage/Placement;J[JZILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun copyMaterialize ()Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun copyToDevice (Lsk/ainet/lang/tensor/storage/DeviceKind;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun copyToHost ()Lsk/ainet/lang/tensor/storage/TensorStorage; + public fun equals (Ljava/lang/Object;)Z + public final fun getBuffer ()Lsk/ainet/lang/tensor/storage/BufferHandle; + public final fun getByteOffset ()J + public final fun getElementCount ()J + public final fun getEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public final fun getLogicalBytes ()J + public final fun getLogicalType ()Lsk/ainet/lang/tensor/storage/LogicalDType; + public final fun getOwnership ()Lsk/ainet/lang/tensor/storage/Ownership; + public final fun getPhysicalBytes ()J + public final fun getPlacement ()Lsk/ainet/lang/tensor/storage/Placement; + public final fun getShape ()Lsk/ainet/lang/tensor/Shape; + public final fun getStrides ()[J + public fun hashCode ()I + public final fun isAlias ()Z + public final fun isContiguous ()Z + public final fun isFileBacked ()Z + public final fun isMutable ()Z + public final fun memoryReport ()Lsk/ainet/lang/tensor/storage/StorageMemoryReport; + public final fun repackTo (Lsk/ainet/lang/tensor/storage/TensorEncoding;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TensorStorageFactory { + public static final field INSTANCE Lsk/ainet/lang/tensor/storage/TensorStorageFactory; + public final fun borrowFloatArray (Lsk/ainet/lang/tensor/Shape;[F)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun fileBacked (Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;Ljava/lang/String;JJ)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun fromFloatArray (Lsk/ainet/lang/tensor/Shape;[F)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun fromIntArray (Lsk/ainet/lang/tensor/Shape;[I)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun fromRawBytes (Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;[BLsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun fromRawBytes$default (Lsk/ainet/lang/tensor/storage/TensorStorageFactory;Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;[BLsk/ainet/lang/tensor/storage/Placement;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun fromRawBytesOwned (Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;[BLsk/ainet/lang/tensor/storage/Placement;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public static synthetic fun fromRawBytesOwned$default (Lsk/ainet/lang/tensor/storage/TensorStorageFactory;Lsk/ainet/lang/tensor/Shape;Lsk/ainet/lang/tensor/storage/LogicalDType;Lsk/ainet/lang/tensor/storage/TensorEncoding;[BLsk/ainet/lang/tensor/storage/Placement;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun fromTensorData (Lsk/ainet/lang/tensor/data/TensorData;)Lsk/ainet/lang/tensor/storage/TensorStorage; + public final fun toTensorData (Lsk/ainet/lang/tensor/storage/TensorStorage;)Lsk/ainet/lang/tensor/data/TensorData; +} + +public final class sk/ainet/lang/tensor/storage/TrackedEntry { + public fun (Ljava/lang/String;Lsk/ainet/lang/tensor/storage/StorageMemoryReport;)V + public final fun component1 ()Ljava/lang/String; + public final fun component2 ()Lsk/ainet/lang/tensor/storage/StorageMemoryReport; + public final fun copy (Ljava/lang/String;Lsk/ainet/lang/tensor/storage/StorageMemoryReport;)Lsk/ainet/lang/tensor/storage/TrackedEntry; + public static synthetic fun copy$default (Lsk/ainet/lang/tensor/storage/TrackedEntry;Ljava/lang/String;Lsk/ainet/lang/tensor/storage/StorageMemoryReport;ILjava/lang/Object;)Lsk/ainet/lang/tensor/storage/TrackedEntry; + public fun equals (Ljava/lang/Object;)Z + public final fun getName ()Ljava/lang/String; + public final fun getReport ()Lsk/ainet/lang/tensor/storage/StorageMemoryReport; + public fun hashCode ()I + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/tensor/storage/TurboQuantKvCacheStore : sk/ainet/lang/tensor/storage/KvCacheStore { + public fun (Lsk/ainet/lang/tensor/storage/KvCacheConfig;Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;Lsk/ainet/lang/tensor/ops/turboquant/TurboQuantConfig;)V + public fun appendToken (I[F[F)V + public fun clear ()V + public fun evict (I)V + public fun getCurrentSeqLen ()I + public fun getHeadDim ()I + public fun getKeyEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public fun getMaxSeqLen ()I + public fun getNumHeads ()I + public fun getNumLayers ()I + public fun getPlacement ()Lsk/ainet/lang/tensor/storage/Placement; + public fun getValueEncoding ()Lsk/ainet/lang/tensor/storage/TensorEncoding; + public fun memoryReport ()Lsk/ainet/lang/tensor/storage/KvCacheMemoryReport; + public fun readKeyStorage (III)Lsk/ainet/lang/tensor/storage/TensorStorage; + public fun readKeys (III)[F + public fun readValueStorage (III)Lsk/ainet/lang/tensor/storage/TensorStorage; + public fun readValues (III)[F +} + +public abstract interface annotation class sk/ainet/lang/tensor/storage/Weights : java/lang/annotation/Annotation { + public abstract fun memory ()Lsk/ainet/lang/tensor/storage/MemoryDomain; +} + +public final class sk/ainet/lang/trace/CompositeSink : sk/ainet/lang/trace/OpSink { + public fun (Ljava/util/List;)V + public fun onOpExecuted (Lsk/ainet/lang/trace/OpTrace;)V +} + +public final class sk/ainet/lang/trace/NoOpSink : sk/ainet/lang/trace/OpSink { + public static final field INSTANCE Lsk/ainet/lang/trace/NoOpSink; + public fun onOpExecuted (Lsk/ainet/lang/trace/OpTrace;)V +} + +public final class sk/ainet/lang/trace/OpAttributeFactory { + public static final field INSTANCE Lsk/ainet/lang/trace/OpAttributeFactory; + public final fun binary (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Ljava/util/Map; + public final fun conv2d (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lkotlin/Pair;Lkotlin/Pair;Lkotlin/Pair;I)Ljava/util/Map; + public final fun scalarOp (Lsk/ainet/lang/tensor/Tensor;Ljava/lang/Number;Lsk/ainet/lang/tensor/Tensor;Z)Ljava/util/Map; + public final fun shapesAndDTypes (Ljava/util/List;Ljava/util/List;)Ljava/util/Map; + public final fun unary (Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;)Ljava/util/Map; } public abstract interface class sk/ainet/lang/trace/OpSink { @@ -4025,6 +5492,66 @@ public final class sk/ainet/lang/types/DTypeExtensionsKt { public static final fun kotlinClass (Lsk/ainet/lang/types/DType;)Lkotlin/reflect/KClass; } +public abstract interface class sk/ainet/lang/types/DTypePolicy { + public static final field Companion Lsk/ainet/lang/types/DTypePolicy$Companion; + public static fun any ()Lsk/ainet/lang/types/DTypePolicy; + public abstract fun isSatisfiedBy (Lsk/ainet/lang/types/DType;)Z + public static fun oneOf ([Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/types/DTypePolicy; + public static fun prefer (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/types/DTypePolicy; + public static fun require (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/types/DTypePolicy; +} + +public final class sk/ainet/lang/types/DTypePolicy$Any : sk/ainet/lang/types/DTypePolicy { + public static final field INSTANCE Lsk/ainet/lang/types/DTypePolicy$Any; + public fun equals (Ljava/lang/Object;)Z + public fun hashCode ()I + public fun isSatisfiedBy (Lsk/ainet/lang/types/DType;)Z + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/types/DTypePolicy$Companion { + public final fun any ()Lsk/ainet/lang/types/DTypePolicy; + public final fun oneOf ([Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/types/DTypePolicy; + public final fun prefer (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/types/DTypePolicy; + public final fun require (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/types/DTypePolicy; +} + +public final class sk/ainet/lang/types/DTypePolicy$OneOf : sk/ainet/lang/types/DTypePolicy { + public fun (Ljava/util/Set;)V + public final fun component1 ()Ljava/util/Set; + public final fun copy (Ljava/util/Set;)Lsk/ainet/lang/types/DTypePolicy$OneOf; + public static synthetic fun copy$default (Lsk/ainet/lang/types/DTypePolicy$OneOf;Ljava/util/Set;ILjava/lang/Object;)Lsk/ainet/lang/types/DTypePolicy$OneOf; + public fun equals (Ljava/lang/Object;)Z + public final fun getAllowed ()Ljava/util/Set; + public fun hashCode ()I + public fun isSatisfiedBy (Lsk/ainet/lang/types/DType;)Z + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/types/DTypePolicy$Prefer : sk/ainet/lang/types/DTypePolicy { + public fun (Lsk/ainet/lang/types/DType;)V + public final fun component1 ()Lsk/ainet/lang/types/DType; + public final fun copy (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/types/DTypePolicy$Prefer; + public static synthetic fun copy$default (Lsk/ainet/lang/types/DTypePolicy$Prefer;Lsk/ainet/lang/types/DType;ILjava/lang/Object;)Lsk/ainet/lang/types/DTypePolicy$Prefer; + public fun equals (Ljava/lang/Object;)Z + public final fun getTarget ()Lsk/ainet/lang/types/DType; + public fun hashCode ()I + public fun isSatisfiedBy (Lsk/ainet/lang/types/DType;)Z + public fun toString ()Ljava/lang/String; +} + +public final class sk/ainet/lang/types/DTypePolicy$Require : sk/ainet/lang/types/DTypePolicy { + public fun (Lsk/ainet/lang/types/DType;)V + public final fun component1 ()Lsk/ainet/lang/types/DType; + public final fun copy (Lsk/ainet/lang/types/DType;)Lsk/ainet/lang/types/DTypePolicy$Require; + public static synthetic fun copy$default (Lsk/ainet/lang/types/DTypePolicy$Require;Lsk/ainet/lang/types/DType;ILjava/lang/Object;)Lsk/ainet/lang/types/DTypePolicy$Require; + public fun equals (Ljava/lang/Object;)Z + public final fun getTarget ()Lsk/ainet/lang/types/DType; + public fun hashCode ()I + public fun isSatisfiedBy (Lsk/ainet/lang/types/DType;)Z + public fun toString ()Ljava/lang/String; +} + public final class sk/ainet/lang/types/FP16 : sk/ainet/lang/types/DType { public static final field INSTANCE Lsk/ainet/lang/types/FP16; public fun getName ()Ljava/lang/String; diff --git a/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/data/Q4_0TensorData.kt b/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/data/Q4_0TensorData.kt new file mode 100644 index 00000000..c1923721 --- /dev/null +++ b/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/data/Q4_0TensorData.kt @@ -0,0 +1,202 @@ +package sk.ainet.lang.tensor.data + +import sk.ainet.lang.tensor.Shape +import sk.ainet.lang.tensor.storage.PackedBlockStorage +import sk.ainet.lang.tensor.storage.TensorEncoding +import sk.ainet.lang.types.DType + +/** + * Tensor data interface for the Q4_0 quantized format (older GGML 4-bit). + * + * Q4_0 block format (32 elements per block, 18 bytes per block): + * - 2 bytes: f16 scale (`d`) + * - 16 bytes: 32 packed 4-bit codes (2 nibbles per byte) + * + * Canonical ggml nibble layout (the *split* layout, matching + * `sk.ainet.io.gguf.dequant.DequantOps.dequantQ4_0FromBytes`): for the + * 16 code bytes `qs[0..15]`, the low nibbles decode elements `0..15` and + * the high nibbles decode elements `16..31`: + * + * element[j] = ((qs[j] & 0x0F) - 8) * d for j ∈ [0, 16) + * element[j + 16] = ((qs[j] >>> 4) - 8) * d + * + * The `- 8` bias makes the 4-bit code symmetric around zero. This is the + * layout real GGUF Q4_0 weights are stored in. + * + * This interface enables direct quantized matmul without full + * dequantization, mirroring [Q8_0TensorData]. + */ +public interface Q4_0TensorData : TensorData { + /** Number of Q4_0 blocks in the tensor. */ + public val blockCount: Int + + /** Raw packed data containing all blocks. */ + public val packedData: ByteArray + + /** Get the scale factor (`d`) for a specific block. */ + public fun getBlockScale(blockIdx: Int): Float + + /** + * Get the raw unsigned 4-bit code (0..15) for [elementIdx] (0..31) + * within a block. The dequantized value is `(code - 8) * scale`. + */ + public fun getCode(blockIdx: Int, elementIdx: Int): Byte + + public companion object { + /** Elements per Q4_0 block. */ + public const val BLOCK_SIZE: Int = 32 + + /** Bytes per Q4_0 block (2 bytes scale + 16 bytes packed nibbles). */ + public const val BYTES_PER_BLOCK: Int = 18 + } +} + +/** + * Implementation of [Q4_0TensorData] backed by a packed byte array. + * + * Memory layout per block (18 bytes): + * - bytes [0..1] : f16 scale (little-endian) + * - bytes [2..17] : 16 bytes packing 32 4-bit codes (split layout, see + * [Q4_0TensorData] kdoc) + * + * @param initialShape the logical shape of the tensor (in elements, not blocks) + * @param data the raw packed block data + */ +public class Q4_0BlockTensorData( + initialShape: Shape, + private val data: ByteArray +) : Q4_0TensorData, PackedBlockStorage { + + override val shape: Shape = Shape(initialShape.dimensions.copyOf()) + private val strides: IntArray = shape.computeStrides() + override val packedData: ByteArray get() = data + + override val blockCount: Int = (shape.volume + Q4_0TensorData.BLOCK_SIZE - 1) / Q4_0TensorData.BLOCK_SIZE + + // PackedBlockStorage implementation + override val encoding: TensorEncoding get() = TensorEncoding.Q4_0 + override val blockSize: Int get() = Q4_0TensorData.BLOCK_SIZE + + override fun dequantizeBlock(blockIdx: Int, output: FloatArray, outputOffset: Int) { + require(blockIdx in 0 until blockCount) { "Block index $blockIdx out of bounds (0..$blockCount)" } + val scale = getBlockScale(blockIdx) + val elemsInBlock = minOf(Q4_0TensorData.BLOCK_SIZE, shape.volume - blockIdx * Q4_0TensorData.BLOCK_SIZE) + val codesBase = blockIdx * Q4_0TensorData.BYTES_PER_BLOCK + 2 + for (j in 0 until 16) { + val b = data[codesBase + j].toInt() and 0xFF + val lo = (b and 0x0F) - 8 + val hi = (b ushr 4) - 8 + val o0 = outputOffset + j + if (j < elemsInBlock && o0 < output.size) output[o0] = lo.toFloat() * scale + val o1 = outputOffset + 16 + j + if (16 + j < elemsInBlock && o1 < output.size) output[o1] = hi.toFloat() * scale + } + } + + init { + val requiredBytes = blockCount * Q4_0TensorData.BYTES_PER_BLOCK + require(data.size >= requiredBytes) { + "Data size ${data.size} is less than required $requiredBytes bytes for $blockCount blocks" + } + } + + override fun getBlockScale(blockIdx: Int): Float { + require(blockIdx in 0 until blockCount) { "Block index $blockIdx out of bounds (0..$blockCount)" } + val offset = blockIdx * Q4_0TensorData.BYTES_PER_BLOCK + val b0 = data[offset].toInt() and 0xFF + val b1 = data[offset + 1].toInt() and 0xFF + return halfToFloat((b1 shl 8) or b0) + } + + override fun getCode(blockIdx: Int, elementIdx: Int): Byte { + require(blockIdx in 0 until blockCount) { "Block index $blockIdx out of bounds" } + require(elementIdx in 0 until Q4_0TensorData.BLOCK_SIZE) { "Element index $elementIdx out of bounds (0..31)" } + val byteInBlock = if (elementIdx < 16) elementIdx else elementIdx - 16 + val b = data[blockIdx * Q4_0TensorData.BYTES_PER_BLOCK + 2 + byteInBlock].toInt() and 0xFF + val nibble = if (elementIdx < 16) (b and 0x0F) else (b ushr 4) + return nibble.toByte() + } + + override fun get(vararg indices: Int): Byte { + val flatIndex = calcFlatIndex(indices) + val blockIdx = flatIndex / Q4_0TensorData.BLOCK_SIZE + val elementIdx = flatIndex % Q4_0TensorData.BLOCK_SIZE + return getCode(blockIdx, elementIdx) + } + + override fun set(vararg indices: Int, value: Byte) { + val flatIndex = calcFlatIndex(indices) + val blockIdx = flatIndex / Q4_0TensorData.BLOCK_SIZE + val elementIdx = flatIndex % Q4_0TensorData.BLOCK_SIZE + val byteInBlock = if (elementIdx < 16) elementIdx else elementIdx - 16 + val offset = blockIdx * Q4_0TensorData.BYTES_PER_BLOCK + 2 + byteInBlock + val nib = value.toInt() and 0x0F + val cur = data[offset].toInt() and 0xFF + data[offset] = if (elementIdx < 16) ((cur and 0xF0) or nib).toByte() + else ((cur and 0x0F) or (nib shl 4)).toByte() + } + + private fun calcFlatIndex(indices: IntArray): Int { + require(indices.size == shape.dimensions.size) { + "Number of indices (${indices.size}) must match tensor dimensions (${shape.dimensions.size})" + } + var flatIndex = 0 + for (i in indices.indices) { + val idx = indices[i] + require(idx >= 0 && idx < shape.dimensions[i]) { + "Index $idx out of bounds for dimension $i with size ${shape.dimensions[i]}" + } + flatIndex += idx * strides[i] + } + return flatIndex + } + + public companion object { + /** Create [Q4_0BlockTensorData] from raw packed Q4_0 bytes. */ + public fun fromRawBytes(shape: Shape, bytes: ByteArray): Q4_0BlockTensorData { + return Q4_0BlockTensorData(shape, bytes) + } + + /** Convert f16 bits to float32. */ + internal fun halfToFloat(hbits: Int): Float { + val sign = (hbits and 0x8000) shl 16 + val exp = (hbits and 0x7C00) shr 10 + val mant = hbits and 0x03FF + return when (exp) { + 0 -> { + if (mant == 0) { + Float.fromBits(sign) + } else { + var m = mant + var e = -14 + while ((m and 0x400) == 0) { + m = m shl 1 + e-- + } + m = m and 0x3FF + Float.fromBits(sign or ((e + 127) shl 23) or (m shl 13)) + } + } + 31 -> Float.fromBits(sign or (0xFF shl 23) or (mant shl 13)) + else -> Float.fromBits(sign or ((exp - 15 + 127) shl 23) or (mant shl 13)) + } + } + } +} + +/** + * Dequantize Q4_0 tensor data to a FloatArray. + * `element[j] = (code[j] - 8) * scale` in the canonical split layout. + */ +public fun Q4_0TensorData.toFloatArray(): FloatArray { + val result = FloatArray(shape.volume) + for (blockIdx in 0 until blockCount) { + val scale = getBlockScale(blockIdx) + val base = blockIdx * Q4_0TensorData.BLOCK_SIZE + val elemsInBlock = minOf(Q4_0TensorData.BLOCK_SIZE, shape.volume - base) + for (i in 0 until elemsInBlock) { + result[base + i] = (getCode(blockIdx, i).toInt() - 8).toFloat() * scale + } + } + return result +} diff --git a/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/storage/TensorEncoding.kt b/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/storage/TensorEncoding.kt index 4a9f745f..bd781a4f 100644 --- a/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/storage/TensorEncoding.kt +++ b/skainet-lang/skainet-lang-core/src/commonMain/kotlin/sk/ainet/lang/tensor/storage/TensorEncoding.kt @@ -52,6 +52,18 @@ public sealed interface TensorEncoding { } } + /** GGML Q4_0 block quantization: 32 elements per 18-byte block. */ + public data object Q4_0 : TensorEncoding { + public const val BLOCK_SIZE: Int = 32 + public const val BYTES_PER_BLOCK: Int = 18 + + override val name: String get() = "Q4_0" + override fun physicalBytes(elementCount: Long): Long { + val blocks = (elementCount + BLOCK_SIZE - 1) / BLOCK_SIZE + return blocks * BYTES_PER_BLOCK + } + } + /** GGML Q8_0 block quantization: 32 elements per 34-byte block. */ public data object Q8_0 : TensorEncoding { public const val BLOCK_SIZE: Int = 32 diff --git a/skainet-lang/skainet-lang-core/src/commonTest/kotlin/sk/ainet/lang/tensor/data/Q4_0TensorDataTest.kt b/skainet-lang/skainet-lang-core/src/commonTest/kotlin/sk/ainet/lang/tensor/data/Q4_0TensorDataTest.kt new file mode 100644 index 00000000..a2cc0c33 --- /dev/null +++ b/skainet-lang/skainet-lang-core/src/commonTest/kotlin/sk/ainet/lang/tensor/data/Q4_0TensorDataTest.kt @@ -0,0 +1,95 @@ +package sk.ainet.lang.tensor.data + +import sk.ainet.lang.tensor.Shape +import kotlin.test.Test +import kotlin.test.assertContentEquals +import kotlin.test.assertEquals + +class Q4_0TensorDataTest { + + /** Pack 32 unsigned 4-bit codes (0..15) into the canonical split layout. */ + private fun packCodes(codes: IntArray): ByteArray { + require(codes.size == 32) + val out = ByteArray(16) + for (j in 0 until 16) { + out[j] = ((codes[j] and 0x0F) or ((codes[j + 16] and 0x0F) shl 4)).toByte() + } + return out + } + + private fun block(scaleLo: Int, scaleHi: Int, codes: IntArray): ByteArray = + byteArrayOf(scaleLo.toByte(), scaleHi.toByte()) + packCodes(codes) + + @Test + fun `constants are correct`() { + assertEquals(32, Q4_0TensorData.BLOCK_SIZE) + assertEquals(18, Q4_0TensorData.BYTES_PER_BLOCK) + } + + @Test + fun `reads scale from block`() { + // scale = 1.0 (f16 0x3C00 little-endian) + val data = block(0x00, 0x3C, IntArray(32) { 8 }) + val tensor = Q4_0BlockTensorData.fromRawBytes(Shape(32), data) + assertEquals(1.0f, tensor.getBlockScale(0), 0.001f) + } + + @Test + fun `split layout decodes low nibbles to first half and high nibbles to second half`() { + // codes[j]=j%16 → low nibble j∈0..15 ; codes[j+16]=15-(j%16) → high nibble + val codes = IntArray(32) { i -> if (i < 16) i else 15 - (i - 16) } + val data = block(0x00, 0x3C, codes) // scale 1.0 + val tensor = Q4_0BlockTensorData.fromRawBytes(Shape(32), data) + for (i in 0 until 32) { + assertEquals(codes[i].toByte(), tensor.getCode(0, i), "code mismatch at $i") + } + } + + @Test + fun `toFloatArray applies minus-eight bias and scale`() { + // scale = 0.5 (f16 0x3800). codes: elem0=10 → (10-8)*0.5=1.0 ; elem16=6 → (6-8)*0.5=-1.0 + val codes = IntArray(32) { 8 } + codes[0] = 10 // low nibble of byte 0 → element 0 + codes[16] = 6 // high nibble of byte 0 → element 16 + val data = block(0x00, 0x38, codes) + val tensor = Q4_0BlockTensorData.fromRawBytes(Shape(32), data) + val floats = tensor.toFloatArray() + assertEquals(1.0f, floats[0], 0.01f) + assertEquals(-1.0f, floats[16], 0.01f) + assertEquals(0.0f, floats[1], 0.01f) // code 8 → (8-8)*scale = 0 + } + + @Test + fun `matches canonical ggml dequant for a known block`() { + // Mirror DequantOps.dequantQ4_0FromBytes: out[j]=(lo-8)*d, out[j+16]=(hi-8)*d. + val codes = IntArray(32) { i -> (i * 7 + 3) and 0x0F } // arbitrary 0..15 pattern + val data = block(0x00, 0x3C, codes) // scale 1.0 + val tensor = Q4_0BlockTensorData.fromRawBytes(Shape(32), data) + val floats = tensor.toFloatArray() + for (i in 0 until 32) { + assertEquals((codes[i] - 8).toFloat(), floats[i], 0.001f, "dequant mismatch at $i") + } + } + + @Test + fun `set round-trips through nibble packing`() { + val data = block(0x00, 0x3C, IntArray(32) { 8 }) + val tensor = Q4_0BlockTensorData.fromRawBytes(Shape(32), data) + tensor[3] = 5 // low nibble of byte 3 + tensor[19] = 12 // high nibble of byte 3 (19-16=3) + assertEquals(5.toByte(), tensor[3]) + assertEquals(12.toByte(), tensor[19]) + } + + @Test + fun `handles multiple blocks and 2D shape`() { + val b0 = block(0x00, 0x3C, IntArray(32) { 8 }) // scale 1.0 + val b1 = block(0x00, 0x40, IntArray(32) { 9 }) // scale 2.0, code 9 + val tensor = Q4_0BlockTensorData.fromRawBytes(Shape(8, 8), b0 + b1) + assertEquals(2, tensor.blockCount) + assertContentEquals(intArrayOf(8, 8), tensor.shape.dimensions) + assertEquals(1.0f, tensor.getBlockScale(0), 0.001f) + assertEquals(2.0f, tensor.getBlockScale(1), 0.001f) + assertEquals(9.toByte(), tensor.getCode(1, 0)) + } +} diff --git a/skainet-lang/skainet-lang-core/src/jvmMain/kotlin/sk/ainet/lang/tensor/data/Q4MemorySegmentTensorData.kt b/skainet-lang/skainet-lang-core/src/jvmMain/kotlin/sk/ainet/lang/tensor/data/Q4MemorySegmentTensorData.kt index c8617307..e2125903 100644 --- a/skainet-lang/skainet-lang-core/src/jvmMain/kotlin/sk/ainet/lang/tensor/data/Q4MemorySegmentTensorData.kt +++ b/skainet-lang/skainet-lang-core/src/jvmMain/kotlin/sk/ainet/lang/tensor/data/Q4MemorySegmentTensorData.kt @@ -27,9 +27,11 @@ public interface Q4MemorySegmentMarker : MemorySegmentBackedData { * * Q4_0 block layout (18 bytes per 32 elements): * - 2 bytes: f16 scale (little-endian) - * - 16 bytes: packed 4-bit codes (32 values, 2 per byte) + * - 16 bytes: packed 4-bit codes (32 values) in the canonical ggml + * *split* layout — low nibbles decode elements 0..15, high nibbles + * decode elements 16..31. * - * Dequantization: output[i] = (nibble[i] - 8) * scale + * Dequantization: output[j] = (nibble[j] - 8) * scale * * The segment is arena-managed and 64-byte aligned for SIMD access. */ @@ -52,9 +54,12 @@ public class Q4MemorySegmentTensorData( val flatIndex = calcFlatIndex(indices) val blockIdx = flatIndex / blockSize val elemIdx = flatIndex % blockSize - val codesByteOffset = segmentByteOffset + blockIdx.toLong() * bytesPerBlock + 2 + (elemIdx / 2).toLong() + // Split layout: elements 0..15 are low nibbles of bytes 0..15, + // elements 16..31 are the high nibbles of the same bytes. + val byteInBlock = if (elemIdx < 16) elemIdx else elemIdx - 16 + val codesByteOffset = segmentByteOffset + blockIdx.toLong() * bytesPerBlock + 2 + byteInBlock.toLong() val packedByte = segment.get(JAVA_BYTE, codesByteOffset).toInt() and 0xFF - val code = if (elemIdx % 2 == 0) packedByte and 0x0F else packedByte ushr 4 + val code = if (elemIdx < 16) packedByte and 0x0F else packedByte ushr 4 return code.toByte() } @@ -62,10 +67,11 @@ public class Q4MemorySegmentTensorData( val flatIndex = calcFlatIndex(indices) val blockIdx = flatIndex / blockSize val elemIdx = flatIndex % blockSize - val codesByteOffset = segmentByteOffset + blockIdx.toLong() * bytesPerBlock + 2 + (elemIdx / 2).toLong() + val byteInBlock = if (elemIdx < 16) elemIdx else elemIdx - 16 + val codesByteOffset = segmentByteOffset + blockIdx.toLong() * bytesPerBlock + 2 + byteInBlock.toLong() val currentByte = segment.get(JAVA_BYTE, codesByteOffset).toInt() and 0xFF val newNibble = value.toInt() and 0x0F - val updated = if (elemIdx % 2 == 0) { + val updated = if (elemIdx < 16) { (currentByte and 0xF0) or newNibble } else { (currentByte and 0x0F) or (newNibble shl 4) @@ -83,11 +89,14 @@ public class Q4MemorySegmentTensorData( val scale = halfToFloat((b1 shl 8) or b0) val elemsInBlock = minOf(blockSize, shape.volume - outIdx) for (i in 0 until elemsInBlock) { - val codeOff = blockOff + 2 + (i / 2).toLong() + // Split layout: i<16 → low nibble of byte i; i>=16 → high nibble of byte i-16. + val byteInBlock = if (i < 16) i else i - 16 + val codeOff = blockOff + 2 + byteInBlock.toLong() val packedByte = segment.get(JAVA_BYTE, codeOff).toInt() and 0xFF - val code = if (i % 2 == 0) packedByte and 0x0F else packedByte ushr 4 - result[outIdx++] = (code - 8).toFloat() * scale + val code = if (i < 16) packedByte and 0x0F else packedByte ushr 4 + result[outIdx + i] = (code - 8).toFloat() * scale } + outIdx += elemsInBlock } return result } diff --git a/skainet-lang/skainet-lang-dag/api/jvm/skainet-lang-dag.api b/skainet-lang/skainet-lang-dag/api/jvm/skainet-lang-dag.api index 5fb46009..09092260 100644 --- a/skainet-lang/skainet-lang-dag/api/jvm/skainet-lang-dag.api +++ b/skainet-lang/skainet-lang-dag/api/jvm/skainet-lang-dag.api @@ -17,6 +17,13 @@ public abstract class sk/ainet/lang/dag/DagModule { public abstract fun apply (Lsk/ainet/lang/dag/DagBuilder;Ljava/util/List;)Ljava/util/List; } +public final class sk/ainet/lang/dag/DtypePolicyDslKt { + public static final field DTYPE_POLICY_ATTRIBUTE_KEY Ljava/lang/String; + public static final fun dtypePolicy (Lsk/ainet/lang/dag/GraphNodeDefinition;)Lsk/ainet/lang/types/DTypePolicy; + public static final fun op (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/tensor/ops/Operation;Ljava/util/List;Lsk/ainet/lang/types/DTypePolicy;Ljava/lang/String;Ljava/util/Map;)Ljava/util/List; + public static synthetic fun op$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/tensor/ops/Operation;Ljava/util/List;Lsk/ainet/lang/types/DTypePolicy;Ljava/lang/String;Ljava/util/Map;ILjava/lang/Object;)Ljava/util/List; +} + public final class sk/ainet/lang/dag/GraphDslKt { public static final fun dag (Lkotlin/jvm/functions/Function1;)Lsk/ainet/lang/dag/GraphProgram; public static final fun dagModule (Lkotlin/jvm/functions/Function2;)Lsk/ainet/lang/dag/DagModule; @@ -27,6 +34,12 @@ public final class sk/ainet/lang/dag/GraphDslKt { public abstract interface class sk/ainet/lang/dag/GraphDslOps : sk/ainet/lang/tensor/ops/TensorOps { } +public final class sk/ainet/lang/dag/GraphDslOps$DefaultImpls { + public static fun convTranspose1d (Lsk/ainet/lang/dag/GraphDslOps;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;Lsk/ainet/lang/tensor/Tensor;IIIII)Lsk/ainet/lang/tensor/Tensor; + public static fun cos (Lsk/ainet/lang/dag/GraphDslOps;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; + public static fun sin (Lsk/ainet/lang/dag/GraphDslOps;Lsk/ainet/lang/tensor/Tensor;)Lsk/ainet/lang/tensor/Tensor; +} + public final class sk/ainet/lang/dag/GraphDslOpsGraphDslKt { public static final fun abs (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; public static synthetic fun abs$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; @@ -68,6 +81,12 @@ public final class sk/ainet/lang/dag/GraphDslOpsGraphDslKt { public static synthetic fun indexSelect$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Lsk/ainet/lang/dag/GraphValue;ILjava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; public static final fun leakyRelu (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;FLjava/lang/String;)Lsk/ainet/lang/dag/GraphValue; public static synthetic fun leakyRelu$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;FLjava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; + public static final fun log (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; + public static synthetic fun log$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; + public static final fun log10 (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; + public static synthetic fun log10$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; + public static final fun log2 (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; + public static synthetic fun log2$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; public static final fun logSoftmax (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;ILjava/lang/String;)Lsk/ainet/lang/dag/GraphValue; public static synthetic fun logSoftmax$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;ILjava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; public static final fun lt (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;FLjava/lang/String;)Lsk/ainet/lang/dag/GraphValue; @@ -86,6 +105,12 @@ public final class sk/ainet/lang/dag/GraphDslOpsGraphDslKt { public static synthetic fun narrow$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;IIILjava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; public static final fun pad2d (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;IIIILjava/lang/String;)Lsk/ainet/lang/dag/GraphValue; public static synthetic fun pad2d$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;IIIILjava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; + public static final fun permute (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;[ILjava/lang/String;)Lsk/ainet/lang/dag/GraphValue; + public static synthetic fun permute$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;[ILjava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; + public static final fun pow (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; + public static synthetic fun pow$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; + public static final fun powScalar (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/Number;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; + public static synthetic fun powScalar$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/Number;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; public static final fun rdivScalar (Lsk/ainet/lang/dag/DagBuilder;Ljava/lang/Number;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; public static synthetic fun rdivScalar$default (Lsk/ainet/lang/dag/DagBuilder;Ljava/lang/Number;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; public static final fun relu (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; @@ -114,6 +139,8 @@ public final class sk/ainet/lang/dag/GraphDslOpsGraphDslKt { public static synthetic fun subtract$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; public static final fun sum (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;ILjava/lang/String;)Lsk/ainet/lang/dag/GraphValue; public static synthetic fun sum$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;ILjava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; + public static final fun tanh (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; + public static synthetic fun tanh$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; public static final fun transpose (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;)Lsk/ainet/lang/dag/GraphValue; public static synthetic fun transpose$default (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;Ljava/lang/String;ILjava/lang/Object;)Lsk/ainet/lang/dag/GraphValue; public static final fun tril (Lsk/ainet/lang/dag/DagBuilder;Lsk/ainet/lang/dag/GraphValue;ILjava/lang/String;)Lsk/ainet/lang/dag/GraphValue;