From 2b50fdd45c58b39b57b0835ccdf1d06932368282 Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Sun, 28 Jun 2026 02:11:43 +0200 Subject: [PATCH] Add imad, imul, msad instructions Examples: dxsa.imad r<1>, r<1>, r<2>, r<3> dxsa.imul r<5, >, r<0, >, -r<1, >, r<1, > dxsa.msad r<0>, r<0>, r<1>, r<2> --- .../mlir/Dialect/DXSA/IR/DXSAFPArithOps.td | 13 --- .../mlir/Dialect/DXSA/IR/DXSAIntArithOps.td | 74 ++++++++++++ .../mlir/Dialect/DXSA/IR/DXSAOpBase.td | 13 +++ mlir/lib/Target/DXSA/BinaryParser.cpp | 6 + mlir/test/Target/DXSA/int_arith_ops.test | 106 ++++++++++++++++++ 5 files changed, 199 insertions(+), 13 deletions(-) diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAFPArithOps.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAFPArithOps.td index 2a0d3c18c54b..976995c16baf 100644 --- a/mlir/include/mlir/Dialect/DXSA/IR/DXSAFPArithOps.td +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAFPArithOps.td @@ -353,19 +353,6 @@ def DXSA_LogSat : DXSA_UnaryOp<"log_sat"> { // dxsa.mad //===----------------------------------------------------------------------===// -// Shared base for the multiply-add family: `$dst = $lhs * $rhs + $acc`. -class DXSA_MultiplyAddOp : DXSA_Op { - let arguments = (ins - DXSA_DstOperandAttr:$dst, - DXSA_SrcOperandAttr:$lhs, - DXSA_SrcOperandAttr:$rhs, - DXSA_SrcOperandAttr:$acc, - OptionalAttr:$precise); - let results = (outs); - let assemblyFormat = - "(`precise` $precise^)? $dst `,` $lhs `,` $rhs `,` $acc attr-dict"; -} - def DXSA_Mad : DXSA_MultiplyAddOp<"mad"> { let summary = "component-wise floating-point multiply-add"; let description = [{ diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAIntArithOps.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAIntArithOps.td index 667e80b9e508..85c50da92690 100644 --- a/mlir/include/mlir/Dialect/DXSA/IR/DXSAIntArithOps.td +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAIntArithOps.td @@ -134,4 +134,78 @@ def DXSA_UMin : DXSA_BinaryOp<"umin"> { }]; } +//===----------------------------------------------------------------------===// +// dxsa.imad +//===----------------------------------------------------------------------===// + +def DXSA_Imad : DXSA_MultiplyAddOp<"imad"> { + let summary = "component-wise integer multiply-add"; + let description = [{ + The `dxsa.imad` operation computes the component-wise integer + multiply-add `$dst = $lhs * $rhs + $acc`. No carry or borrow beyond the + 32-bit value of each component is performed, so the result is not + sensitive to the signedness of its operands. + + Example: + + ```mlir + dxsa.imad r<0>, r<1>, r<2>, r<3> + dxsa.imad r<0>, -r<1>, r<2>, r<3> + dxsa.imad r<0, >, r<1, >, r<2, >, r<3, > + ``` + }]; +} + +//===----------------------------------------------------------------------===// +// dxsa.imul +//===----------------------------------------------------------------------===// + +def DXSA_Imul : DXSA_Op<"imul"> { + let summary = "component-wise integer multiply"; + let description = [{ + The `dxsa.imul` operation computes the component-wise product + `$lhs * $rhs` as a 64-bit integer and writes the high 32 bits to + `$dstHi` and the low 32 bits to `$dstLo`. Either destination may be + `null` when that half of the result is not needed. + + Example: + + ```mlir + dxsa.imul r<0>, r<1>, r<2>, r<3> + dxsa.imul null, r<0, >, r<0, >, -r<0, > + dxsa.imul r<7, >, r<3, >, r<3, >, r<4, > + ``` + }]; + let arguments = (ins + DXSA_DstOperandAttr:$dstHi, + DXSA_DstOperandAttr:$dstLo, + DXSA_SrcOperandAttr:$lhs, + DXSA_SrcOperandAttr:$rhs, + OptionalAttr:$precise); + let results = (outs); + let assemblyFormat = + "(`precise` $precise^)? $dstHi `,` $dstLo `,` $lhs `,` $rhs attr-dict"; +} + +//===----------------------------------------------------------------------===// +// dxsa.msad +//===----------------------------------------------------------------------===// + +def DXSA_Msad : DXSA_TernaryOp<"msad"> { + let summary = "component-wise masked sum of absolute differences"; + let description = [{ + The `dxsa.msad` operation computes the component-wise masked sum of + absolute differences. For each 32-bit component, `$src0` and `$src1` + each hold four packed 8-bit unsigned integers, and `$src2` supplies a + 32-bit unsigned accumulation value. The result is written to `$dst`. + + Example: + + ```mlir + dxsa.msad r<0>, r<0>, r<1>, r<2> + dxsa.msad r<1, >, r<1, >, r<1, >, r<1, > + ``` + }]; +} + #endif // MLIR_DIALECT_DXSA_IR_DXSAINTARITHOPS diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOpBase.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOpBase.td index 6f67ca4c3329..a18c375c5494 100644 --- a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOpBase.td +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOpBase.td @@ -61,4 +61,17 @@ class DXSA_TernaryOp : DXSA_Op { "(`precise` $precise^)? $dst `,` $src0 `,` $src1 `,` $src2 attr-dict"; } +// Shared base for the multiply-add family: `$dst = $lhs * $rhs + $acc`. +class DXSA_MultiplyAddOp : DXSA_Op { + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$lhs, + DXSA_SrcOperandAttr:$rhs, + DXSA_SrcOperandAttr:$acc, + OptionalAttr:$precise); + let results = (outs); + let assemblyFormat = + "(`precise` $precise^)? $dst `,` $lhs `,` $rhs `,` $acc attr-dict"; +} + #endif // MLIR_DIALECT_DXSA_IR_DXSAOPBASE diff --git a/mlir/lib/Target/DXSA/BinaryParser.cpp b/mlir/lib/Target/DXSA/BinaryParser.cpp index 976322f05f8c..fc66bcef128e 100644 --- a/mlir/lib/Target/DXSA/BinaryParser.cpp +++ b/mlir/lib/Target/DXSA/BinaryParser.cpp @@ -2405,6 +2405,12 @@ class Parser { return PLAIN_OP(UMax, 1, 2, HasPreciseAttr::Yes); case D3D10_SB_OPCODE_UMIN: return PLAIN_OP(UMin, 1, 2, HasPreciseAttr::Yes); + case D3D10_SB_OPCODE_IMAD: + return PLAIN_OP(Imad, 1, 3, HasPreciseAttr::Yes); + case D3D10_SB_OPCODE_IMUL: + return PLAIN_OP(Imul, 2, 2, HasPreciseAttr::Yes); + case D3D11_1_SB_OPCODE_MSAD: + return PLAIN_OP(Msad, 1, 3, HasPreciseAttr::Yes); // Bitwise instructions case D3D10_SB_OPCODE_AND: return PLAIN_OP(And, 1, 2, HasPreciseAttr::Yes); diff --git a/mlir/test/Target/DXSA/int_arith_ops.test b/mlir/test/Target/DXSA/int_arith_ops.test index 69081ad56bf8..e5b9f3a8b310 100644 --- a/mlir/test/Target/DXSA/int_arith_ops.test +++ b/mlir/test/Target/DXSA/int_arith_ops.test @@ -103,3 +103,109 @@ // CHECK-NEXT: dxsa.umin r<0, >, r<1, >, r<2, > // CHECK-NEXT: } 0x07000054, 0x00100012, 0x00000000, 0x00100796, 0x00000001, 0x00100006, 0x00000002 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imad precise r<0>, r<0>, r<1>, r<2> +// CHECK-NEXT: } +0x09780023, 0x001000F2, 0x00000000, 0x00100E46, 0x00000000, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imad r<1>, r<1>, r<2>, r<3> +// CHECK-NEXT: } +0x09000023, 0x001000F2, 0x00000001, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imad r<1>, -r<1>, r<2>, r<3> +// CHECK-NEXT: } +0x0A000023, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imad r<1>, -r<1>, r<2>, -r<3> +// CHECK-NEXT: } +0x0B000023, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x80100E46, 0x00000041, 0x00000003 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imad r<1>, r<1, >, r<2>, r<3, > +// CHECK-NEXT: } +0x09000023, 0x001000F2, 0x00000001, 0x00100E16, 0x00000001, 0x00100E46, 0x00000002, 0x00100D26, 0x00000003 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imad r<1, >, r<1, >, r<1, >, r<1, > +// CHECK-NEXT: } +0x09000023, 0x00100012, 0x00000001, 0x0010000A, 0x00000001, 0x0010001A, 0x00000001, 0x0010002A, 0x00000001 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imul precise r<7, >, r<3, >, r<3, >, r<4, > +// CHECK-NEXT: } +0x09380026, 0x00100072, 0x00000007, 0x00100072, 0x00000003, 0x00100246, 0x00000003, 0x00100246, 0x00000004 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imul r<6, >, r<2, >, r<2, >, r<3, > +// CHECK-NEXT: } +0x09000026, 0x001000d2, 0x00000006, 0x001000d2, 0x00000002, 0x00100256, 0x00000002, 0x00100256, 0x00000003 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imul r<5, >, r<1, >, r<1, >, r<1, > +// CHECK-NEXT: } +0x09000026, 0x00100012, 0x00000005, 0x00100012, 0x00000001, 0x0010001a, 0x00000001, 0x0010000a, 0x00000001 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imul r<5, >, r<0, >, -r<1, >, r<1, > +// CHECK-NEXT: } +0x0a000026, 0x00100082, 0x00000005, 0x00100082, 0x00000000, 0x8010001a, 0x00000041, 0x00000001, 0x0010000a, 0x00000001 + +// ----- + +// Typically, FXC produces imul with null for destHI operand. +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.imul null, r<0, >, r<0, >, -r<0, > +// CHECK-NEXT: } +0x09000026, 0x0000d000, 0x00100042, 0x00000000, 0x0010001a, 0x00000000, 0x8010000a, 0x00000041, 0x00000000 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.msad r<0>, r<0>, r<1>, r<2> +// CHECK-NEXT: } +0x090000D5, 0x001000F2, 0x00000000, 0x001000F2, 0x00000000, 0x001000F2, 0x00000001, 0x001000F2, 0x00000002 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.msad r<1, >, r<1, >, r<2, >, r<1, > +// CHECK-NEXT: } +0x090000D5, 0x00100032, 0x00000001, 0x00100006, 0x00000001, 0x00100596, 0x00000002, 0x00100596, 0x00000001 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.msad precise r<2>, r<2, >, r<4>, r<3, > +// CHECK-NEXT: } +0x097800D5, 0x001000F2, 0x00000002, 0x00100006, 0x00000002, 0x00100E46, 0x00000004, 0x001004E6, 0x00000003 + +// ----- + +// CHECK-LABEL: dxsa.module { +// CHECK-NEXT: dxsa.msad r<1, >, r<1, >, r<1, >, r<1, > +// CHECK-NEXT: } +0x090000D5, 0x00100012, 0x00000001, 0x0010000A, 0x00000001, 0x0010001A, 0x00000001, 0x0010002A, 0x00000001