Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 49 additions & 1 deletion mlir/include/mlir/Dialect/DXSA/IR/DXSADoubleArithOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#ifndef MLIR_DIALECT_DXSA_IR_DXSADOUBLEARITHOPS
#define MLIR_DIALECT_DXSA_IR_DXSADOUBLEARITHOPS

include "mlir/Dialect/DXSA/IR/DXSAOpBase.td"
include "mlir/Dialect/DXSA/IR/DXSAFPArithOps.td"

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we just move DXSA_MultiplyAddOp to the DXSAOpBase.td? This seems a better solution rather then including FP ops to double.

@dzhidzhoev dzhidzhoev Jun 28, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, that makes sense. I did that in #210. I will update it here after #210 and #190 get merged.


//===----------------------------------------------------------------------===//
// dxsa.dadd
Expand Down Expand Up @@ -305,4 +305,52 @@ def DXSA_DrcpSat : DXSA_UnaryOp<"drcp_sat"> {
}];
}

//===----------------------------------------------------------------------===//
// dxsa.dfma
//===----------------------------------------------------------------------===//

def DXSA_Dfma : DXSA_MultiplyAddOp<"dfma"> {
let summary = "component-wise double-precision multiply-add";
let description = [{
The `dxsa.dfma` operation computes the component-wise double-precision
multiply-add `$dst = $lhs * $rhs + $acc`. Each operand holds a vector of
doubles, one double per `xy` and `zw` component pair.

Because each double spans a component pair, the destination write mask must
be `<x, y>`, `<z, w>`, or `<x, y, z, w>`, and each source swizzle must be
one of `<x, y, z, w>`, `<x, y, x, y>`, `<z, w, x, y>`, or `<z, w, z, w>`.

Example:

```mlir
dxsa.dfma r<0>, r<1>, r<2>, r<3>
dxsa.dfma r<1, <x, y>>, r<1, <x, y, x, y>>, r<1, <z, w, z, w>>, r<2, <x, y, x, y>>
```
}];
}

//===----------------------------------------------------------------------===//
// dxsa.dfma_sat
//===----------------------------------------------------------------------===//

def DXSA_DfmaSat : DXSA_MultiplyAddOp<"dfma_sat"> {
let summary = "component-wise double-precision multiply-add, saturated to [0, 1]";
let description = [{
The `dxsa.dfma_sat` operation computes the component-wise double-precision
multiply-add of `$lhs`, `$rhs` and `$acc`, clamps each result component to
`[0.0, 1.0]`, and writes it to `$dst`.

Because each double spans a component pair, the destination write mask must
be `<x, y>`, `<z, w>`, or `<x, y, z, w>`, and each source swizzle must be
one of `<x, y, z, w>`, `<x, y, x, y>`, `<z, w, x, y>`, or `<z, w, z, w>`.

Example:

```mlir
dxsa.dfma_sat r<0>, r<1>, r<2>, r<3>
dxsa.dfma_sat r<1, <x, y>>, r<1, <x, y, x, y>>, r<1, <z, w, z, w>>, r<2, <x, y, x, y>>
```
}];
}

#endif // MLIR_DIALECT_DXSA_IR_DXSADOUBLEARITHOPS
2 changes: 2 additions & 0 deletions mlir/lib/Target/DXSA/BinaryParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2437,6 +2437,8 @@ class Parser {
return SATURABLE_OP(Dmul, 1, 2, HasPreciseAttr::Yes);
case D3D11_1_SB_OPCODE_DDIV:
return SATURABLE_OP(Ddiv, 1, 2, HasPreciseAttr::Yes);
case D3D11_1_SB_OPCODE_DFMA:
return SATURABLE_OP(Dfma, 1, 3, HasPreciseAttr::Yes);
case D3D11_1_SB_OPCODE_DRCP:
return SATURABLE_OP(Drcp, 1, 1, HasPreciseAttr::Yes);
}
Expand Down
84 changes: 84 additions & 0 deletions mlir/test/Target/DXSA/double_arith_ops.test
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,87 @@
// CHECK-NEXT: dxsa.drcp_sat r<0>, r<1>
// CHECK-NEXT: }
0x050020d4, 0x001000f2, 0x00000000, 0x00100e46, 0x00000001

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma precise <x, y, z, w> r<0>, r<0>, r<1>, r<2>
// CHECK-NEXT: }
0x097800D3, 0x001000F2, 0x00000000, 0x00100E46, 0x00000000, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat precise <x, y, z, w> r<1>, r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x097820D3, 0x001000F2, 0x00000001, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1>, r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x090000D3, 0x001000F2, 0x00000001, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1>, r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x090020D3, 0x001000F2, 0x00000001, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1>, -r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x0A0000D3, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1>, -r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x0A0020D3, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1>, -r<1>, r<2>, -r<3>
// CHECK-NEXT: }
0x0B0000D3, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x80100E46, 0x00000041, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1>, -r<1>, r<2>, -r<3>
// CHECK-NEXT: }
0x0B0020D3, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x80100E46, 0x00000041, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1>, r<1, <z, w, x, y>>, r<2>, r<3>
// CHECK-NEXT: }
0x090000D3, 0x001000F2, 0x00000001, 0x001004E6, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1>, r<1, <z, w, x, y>>, r<2>, r<3>
// CHECK-NEXT: }
0x090020D3, 0x001000F2, 0x00000001, 0x001004E6, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1, <x, y>>, r<1, <x, y, x, y>>, r<1, <z, w, z, w>>, r<2, <x, y, x, y>>
// CHECK-NEXT: }
0x090000D3, 0x00100032, 0x00000001, 0x00100446, 0x00000001, 0x00100EE6, 0x00000001, 0x00100446, 0x00000002

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1, <x, y>>, r<1, <x, y, x, y>>, r<1, <z, w, z, w>>, r<2, <x, y, x, y>>
// CHECK-NEXT: }
0x090020D3, 0x00100032, 0x00000001, 0x00100446, 0x00000001, 0x00100EE6, 0x00000001, 0x00100446, 0x00000002