From 6e875808c22cdb835d908f2f117adfdba2fad8c4 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Mon, 15 Jun 2026 23:50:19 +0900 Subject: [PATCH 1/2] [mlir][dxsa] Add sample instruction Sample instruction takes an address, a resource (texture), a sampler, and writes texture data to the destination register. There are several optional that can be present: - Offset is encoded as an extended opcode. - LOD clamp and feeback for sample_cl_s. The spec mentions clamp and feedback as optional, but DXC decodes them both. It is possible that they are always present as operands, but can be null. Other extended instruction are added for other resource instructions, and not enabled for sample instruction. --- mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td | 1 + .../mlir/Dialect/DXSA/IR/DXSAResourceOps.td | 273 +++++++++++++++++ mlir/lib/Dialect/DXSA/IR/DXSAOperand.cpp | 14 + mlir/lib/Target/DXSA/BinaryParser.cpp | 284 ++++++++++++++++++ mlir/test/Target/DXSA/inputs/sample.bin | Bin 0 -> 44 bytes .../DXSA/inputs/sample_clamp_feedback.bin | Bin 0 -> 60 bytes .../test/Target/DXSA/inputs/sample_offset.bin | Bin 0 -> 48 bytes mlir/test/Target/DXSA/sample.mlir | 6 + mlir/test/Target/DXSA/sample_b.mlir | 8 + .../Target/DXSA/sample_b_clamp_feedback.mlir | 10 + mlir/test/Target/DXSA/sample_c.mlir | 8 + .../Target/DXSA/sample_c_clamp_feedback.mlir | 10 + mlir/test/Target/DXSA/sample_c_lz.mlir | 8 + .../DXSA/sample_c_lz_clamp_feedback.mlir | 10 + .../Target/DXSA/sample_clamp_feedback.mlir | 8 + mlir/test/Target/DXSA/sample_d.mlir | 8 + .../Target/DXSA/sample_d_clamp_feedback.mlir | 10 + mlir/test/Target/DXSA/sample_l.mlir | 8 + .../Target/DXSA/sample_l_clamp_feedback.mlir | 10 + mlir/test/Target/DXSA/sample_offset.mlir | 7 + .../Target/DXSA/sample_offset_invalid.mlir | 29 ++ 21 files changed, 712 insertions(+) create mode 100644 mlir/include/mlir/Dialect/DXSA/IR/DXSAResourceOps.td create mode 100644 mlir/test/Target/DXSA/inputs/sample.bin create mode 100644 mlir/test/Target/DXSA/inputs/sample_clamp_feedback.bin create mode 100644 mlir/test/Target/DXSA/inputs/sample_offset.bin create mode 100644 mlir/test/Target/DXSA/sample.mlir create mode 100644 mlir/test/Target/DXSA/sample_b.mlir create mode 100644 mlir/test/Target/DXSA/sample_b_clamp_feedback.mlir create mode 100644 mlir/test/Target/DXSA/sample_c.mlir create mode 100644 mlir/test/Target/DXSA/sample_c_clamp_feedback.mlir create mode 100644 mlir/test/Target/DXSA/sample_c_lz.mlir create mode 100644 mlir/test/Target/DXSA/sample_c_lz_clamp_feedback.mlir create mode 100644 mlir/test/Target/DXSA/sample_clamp_feedback.mlir create mode 100644 mlir/test/Target/DXSA/sample_d.mlir create mode 100644 mlir/test/Target/DXSA/sample_d_clamp_feedback.mlir create mode 100644 mlir/test/Target/DXSA/sample_l.mlir create mode 100644 mlir/test/Target/DXSA/sample_l_clamp_feedback.mlir create mode 100644 mlir/test/Target/DXSA/sample_offset.mlir create mode 100644 mlir/test/Target/DXSA/sample_offset_invalid.mlir diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td index f895c699c710..7ddeca9e13ee 100644 --- a/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAOps.td @@ -12,6 +12,7 @@ include "mlir/Dialect/DXSA/IR/DXSAOpBase.td" include "mlir/Dialect/DXSA/IR/DXSATypes.td" include "mlir/Dialect/DXSA/IR/DXSAFPArithOps.td" +include "mlir/Dialect/DXSA/IR/DXSAResourceOps.td" include "mlir/IR/AttrTypeBase.td" include "mlir/IR/BuiltinAttributeInterfaces.td" include "mlir/IR/EnumAttr.td" diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAResourceOps.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAResourceOps.td new file mode 100644 index 000000000000..e0f9316ee114 --- /dev/null +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAResourceOps.td @@ -0,0 +1,273 @@ +//===---- DXSAResourceOps.td - sampler and resource ops ----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Sampler and Resource instructions of the DXSA dialect. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_DXSA_IR_DXSARESOURCEOPS +#define MLIR_DIALECT_DXSA_IR_DXSARESOURCEOPS + +include "mlir/Dialect/DXSA/IR/DXSAOpBase.td" + +def DXSA_SampleOffsetAttr : AttrDef { + let mnemonic = "sample_offset"; + let summary = "immediate offset for the texture coordinates for the sample"; + let description = [{ + The dxsa.sample_offset attribute indicates that the texture coordinates for + the sample are to be offset by a set of provided immediate texel space + integer constant values. The literal values are a set of 4 bit 2's + complement numbers, having integer range [-8,7]. + }]; + let parameters = (ins "int32_t":$u, "int32_t":$v, "int32_t":$w); + let assemblyFormat = "`<` struct(params) `>`"; + let genVerifyDecl = 1; +} + +def DXSA_SampleClampFeedbackAttr : AttrDef { + let mnemonic = "sample_clamp_feedback"; + let summary = "optional LOD clamp and Tiled Resources shader feedback status output value"; + let description = [{ + The dxsa.sample_clamp_feed attribute is an optional pair of operands for + dxsa.sample instruction. + + `lod_clamp` is an additional 32 bit scalar LOD clamp operand. + + `feedback` is a shader feedback status output value. The contents + of the return value are opaque - direct reading by the shader + program is disallowed. + }]; + + let parameters = (ins + AttrParameter<"SrcOperandAttr", "lod_clamp">:$lod_clamp, + AttrParameter<"DstOperandAttr", "feedback">:$feedback); + + let assemblyFormat = "`<` $lod_clamp `,` $feedback `>`"; +} + +//===----------------------------------------------------------------------===// +// dxsa.sample +//===----------------------------------------------------------------------===// + +def DXSA_Sample : DXSA_Op<"sample"> { + let summary = "sample data from the specified texture using the filtering mode identified by the given sampler"; + let description = [{ + The dxsa.sample operation uses provided address, sample data from the + specified Element/texture using the filtering mode identified by the given + sampler. The source data may come from any Resource Type (5), other than + Buffers. + + `src_address` provides the set of texture coordinates needed to perform the + sample, as floating point values referencing normalized space in the texture. + + `src_resource` is a texture register (t). This is simply a + placeholder for a texture, including the return data type of the + resource being sampled. + + `src_sampler` is a sampler register (s). This is simply a + placeholder for a collection of filtering controls (such as point + vs. linear, mipmapping and address wrapping controls). + + The optional `offset` operand suffix (address offset by immediate + integer) indicates that the texture coordinates for the sample are + to be offset by a set of provided immediate texel space integer + constant values. The literal values are a set of 4 bit 2's + complement numbers, having integer range [-8,7]. + + The optional `clamp_feedback` operand appends an additional 32 bit + scalar LOD clamp operand and an additional 32 bit scalar Tiled + Resources shader feedback status output value. + }]; + + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$src_address, + DXSA_SrcOperandAttr:$src_resource, + DXSA_SrcOperandAttr:$src_sampler, + OptionalAttr:$offset, + OptionalAttr:$clamp_feedback); + let results = (outs); + + let assemblyFormat = [{ + $dst `,` $src_address `,` $src_resource `,` $src_sampler + (`,` $offset^)? (`,` $clamp_feedback^)? + attr-dict + }]; +} + +//===----------------------------------------------------------------------===// +// dxsa.sample_b +//===----------------------------------------------------------------------===// + +def DXSA_SampleB : DXSA_Op<"sample_b"> { + let summary = "sample data from the specified texture with an additional bias applied to the LOD"; + let description = [{ + The `dxsa.sample_b` operation uses provided address, sample data from the + specified Element/texture using the filtering mode identified by the given + sampler. The source data may come from any Resource Type(5), other than + Buffers. An additional bias is applied to the level of detail computed as + part of the instruction execution. + + `src_address`, `src_resource`, `src_sampler`, `offset`, and `clamp_feedback` + operands are the same as in `dxsa.sample` instruction. + + The `src_lod_bias` value is added to the computed LOD on a per-pixel basis, + along with the sampler MipLODBias value, prior to the clamp to MinLOD and + MaxLOD. + + }]; + + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$src_address, + DXSA_SrcOperandAttr:$src_resource, + DXSA_SrcOperandAttr:$src_sampler, + DXSA_SrcOperandAttr:$src_lod_bias, + OptionalAttr:$offset, + OptionalAttr:$clamp_feedback); + let results = (outs); + + let assemblyFormat = [{ + $dst `,` $src_address `,` $src_resource `,` $src_sampler `,` $src_lod_bias + (`,` $offset^)? (`,` $clamp_feedback^)? + attr-dict + }]; +} + +//===----------------------------------------------------------------------===// +// dxsa.sample_d +//===----------------------------------------------------------------------===// + +def DXSA_SampleD : DXSA_Op<"sample_d"> { + let summary = "sample data from the specified texture with derivatives for X and Y"; + let description = [{ + The `dxsa.sample_d` operation uses provided address, sample data from the + specified Element/texture using the filtering mode identified by the given + sampler. The source data may come from any Resource Type(5), other than + Buffers. + + `src_address`, `src_resource`, `src_sampler`, `offset`, and `clamp_feedback` + operands are the same as in `dxsa.sample` instruction. + + Derivatives for the source address in the x direction and the y direction are + provided by extra parameters, `src_x_derivatives` and `src_y_derivatives`, + respectively. These derivatives are in normalized texture coordinate space. + }]; + + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$src_address, + DXSA_SrcOperandAttr:$src_resource, + DXSA_SrcOperandAttr:$src_sampler, + DXSA_SrcOperandAttr:$src_x_derivatives, + DXSA_SrcOperandAttr:$src_y_derivatives, + OptionalAttr:$offset, + OptionalAttr:$clamp_feedback); + let results = (outs); + + let assemblyFormat = [{ + $dst `,` $src_address `,` $src_resource `,` $src_sampler + `,` $src_x_derivatives `,` $src_y_derivatives + (`,` $offset^)? (`,` $clamp_feedback^)? + attr-dict + }]; +} + +//===----------------------------------------------------------------------===// +// dxsa.sample_l +//===----------------------------------------------------------------------===// + +def DXSA_SampleL : DXSA_Op<"sample_l"> { + let summary = "sample data from the specified texture with specific LOD"; + let description = [{ + `dxsa.sample_l` is identical to `dxsa.sample`, except that LOD is provided + directly by the application as a scalar value, representing no anisotropy. + + `src_address`, `src_resource`, `src_sampler`, `offset`, and `clamp_feedback` + operands are the same as in `dxsa.sample` instruction. + + `src_lod` is the LOD value. If the LOD value is <= 0, the zero'th (biggest + map) is chosen, with the magnify filter applied (if applicable based on the + filter mode). Since `src_lod` is a floating point value, the fractional + value is used to interpolate (if the minify filter is LINEAR or with + anisotropic filtering) between two mip levels. + }]; + + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$src_address, + DXSA_SrcOperandAttr:$src_resource, + DXSA_SrcOperandAttr:$src_sampler, + DXSA_SrcOperandAttr:$src_lod, + OptionalAttr:$offset, + OptionalAttr:$clamp_feedback); + let results = (outs); + + let assemblyFormat = [{ + $dst `,` $src_address `,` $src_resource `,` $src_sampler + `,` $src_lod + (`,` $offset^)? (`,` $clamp_feedback^)? + attr-dict + }]; +} + +//===----------------------------------------------------------------------===// +// dxsa.sample_c +//===----------------------------------------------------------------------===// + +class DXSA_SampleC_Base : DXSA_Op { + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$src_address, + DXSA_SrcOperandAttr:$src_resource, + DXSA_SrcOperandAttr:$src_sampler, + DXSA_SrcOperandAttr:$src_reference_value, + OptionalAttr:$offset, + OptionalAttr:$clamp_feedback); + let results = (outs); + + let assemblyFormat = [{ + $dst `,` $src_address `,` $src_resource `,` $src_sampler + `,` $src_reference_value + (`,` $offset^)? (`,` $clamp_feedback^)? + attr-dict + }]; +} + +def DXSA_SampleC : DXSA_SampleC_Base<"sample_c"> { + let summary = "perform a comparison filter"; + let description = [{ + `dxsa.sample_c` is to provide a building-block for Percentage-Closer Depth + filtering. The 'c' in sample_c stands for Comparison. + + The operands to `dxsa.sample_c` are identical to `dxsa.sample`, except that + there is an additional float32 source operand, `src_reference_value`, which + must be a register with single-component selected, or a scalar literal. + }]; +} + +//===----------------------------------------------------------------------===// +// dxsa.sample_c_lz +//===----------------------------------------------------------------------===// + +def DXSA_SampleCLZ : DXSA_SampleC_Base<"sample_c_lz"> { + let summary = "perform a comparison filter with zero LOD"; + let description = [{ + Same as `dxsa.sample_c`, except LOD is 0, and derivatives are ignored (as if + they are 0). The 'lz' stands for level-zero. Because derivatives are + ignored, this instruction is available in shaders other than the Pixel + Shader. + + `dxsa.sample_c` is to provide a building-block for Percentage-Closer Depth + filtering. The 'c' in sample_c stands for Comparison. + + The operands to `dxsa.sample_c_lz` are identical to `dxsa.sample_c`. + }]; +} + +#endif // MLIR_DIALECT_DXSA_IR_DXSARESOURCEOPS diff --git a/mlir/lib/Dialect/DXSA/IR/DXSAOperand.cpp b/mlir/lib/Dialect/DXSA/IR/DXSAOperand.cpp index 9208efaa6cf4..773cab25a773 100644 --- a/mlir/lib/Dialect/DXSA/IR/DXSAOperand.cpp +++ b/mlir/lib/Dialect/DXSA/IR/DXSAOperand.cpp @@ -991,3 +991,17 @@ void SrcOperandAttr::print(AsmPrinter &printer) const { printNegAndAbsModifier(printer, getModifier(), [&] { printSrcOperandBody(printer, *this); }); } + +LogicalResult +SampleOffsetAttr::verify(function_ref emitError, + int32_t u, int32_t v, int32_t w) { + int32_t values[] = {u, v, w}; + for (int32_t value : values) { + if (value < -8 || value > 7) { + return emitError() + << "sample offsets must be 4 bit 2's complement numbers, " + "having integer range [-8,7]"; + } + } + return success(); +} diff --git a/mlir/lib/Target/DXSA/BinaryParser.cpp b/mlir/lib/Target/DXSA/BinaryParser.cpp index fe7d81302f32..bbce68f1674e 100644 --- a/mlir/lib/Target/DXSA/BinaryParser.cpp +++ b/mlir/lib/Target/DXSA/BinaryParser.cpp @@ -385,6 +385,30 @@ struct InstructionModifier { uint32_t saturate{0}; }; +struct ExtendedInstructionSampleOffset { + int32_t u; + int32_t v; + int32_t w; +}; + +struct ExtendedInstructionResourceDim { + uint32_t dim; + std::optional stride; +}; + +struct ExtendedInstructionResourceReturnType { + uint32_t x; + uint32_t y; + uint32_t z; + uint32_t w; +}; + +struct ExtendedInstruction { + std::optional sampleOffset; + std::optional resourceDim; + std::optional resourceReturnType; +}; + struct OperandModifier { uint32_t modifier{0}; uint32_t minPrecision{0}; @@ -798,6 +822,78 @@ class DXBuilder { return OpT::create(builder, loc, dst, lhs, rhs, preciseAttr); } + dxsa::SampleOffsetAttr + buildSampleOffsetAttr(const ExtendedInstructionSampleOffset &sampleOffset) { + return dxsa::SampleOffsetAttr::get(context, sampleOffset.u, sampleOffset.v, + sampleOffset.w); + } + + dxsa::SampleClampFeedbackAttr + buildSampleClampFeedbackAttr(dxsa::SrcOperandAttr clamp, + dxsa::DstOperandAttr feedback) { + return dxsa::SampleClampFeedbackAttr::get(context, clamp, feedback); + } + + Instruction + buildSampleOp(dxsa::DstOperandAttr dst, dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcResource, + dxsa::SrcOperandAttr srcSampler, dxsa::SampleOffsetAttr offset, + dxsa::SampleClampFeedbackAttr clampFeedback, Location loc) { + return dxsa::Sample::create(builder, loc, dst, srcAddress, srcResource, + srcSampler, offset, clampFeedback); + } + + Instruction + buildSampleBOp(dxsa::DstOperandAttr dst, dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcResource, + dxsa::SrcOperandAttr srcSampler, + dxsa::SrcOperandAttr srcLodBias, dxsa::SampleOffsetAttr offset, + dxsa::SampleClampFeedbackAttr clampFeedback, Location loc) { + return dxsa::SampleB::create(builder, loc, dst, srcAddress, srcResource, + srcSampler, srcLodBias, offset, clampFeedback); + } + + Instruction buildSampleDOp( + dxsa::DstOperandAttr dst, dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcResource, dxsa::SrcOperandAttr srcSampler, + dxsa::SrcOperandAttr srcXDerivatives, + dxsa::SrcOperandAttr srcYDerivatives, dxsa::SampleOffsetAttr offset, + dxsa::SampleClampFeedbackAttr clampFeedback, Location loc) { + return dxsa::SampleD::create(builder, loc, dst, srcAddress, srcResource, + srcSampler, srcXDerivatives, srcYDerivatives, + offset, clampFeedback); + } + + Instruction + buildSampleLOp(dxsa::DstOperandAttr dst, dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcResource, + dxsa::SrcOperandAttr srcSampler, dxsa::SrcOperandAttr srcLod, + dxsa::SampleOffsetAttr offset, + dxsa::SampleClampFeedbackAttr clampFeedback, Location loc) { + return dxsa::SampleL::create(builder, loc, dst, srcAddress, srcResource, + srcSampler, srcLod, offset, clampFeedback); + } + + Instruction buildSampleCOp( + dxsa::DstOperandAttr dst, dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcResource, dxsa::SrcOperandAttr srcSampler, + dxsa::SrcOperandAttr srcReferenceValue, dxsa::SampleOffsetAttr offset, + dxsa::SampleClampFeedbackAttr clampFeedback, Location loc) { + return dxsa::SampleC::create(builder, loc, dst, srcAddress, srcResource, + srcSampler, srcReferenceValue, offset, + clampFeedback); + } + + Instruction buildSampleCLZOp( + dxsa::DstOperandAttr dst, dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcResource, dxsa::SrcOperandAttr srcSampler, + dxsa::SrcOperandAttr srcReferenceValue, dxsa::SampleOffsetAttr offset, + dxsa::SampleClampFeedbackAttr clampFeedback, Location loc) { + return dxsa::SampleCLZ::create(builder, loc, dst, srcAddress, srcResource, + srcSampler, srcReferenceValue, offset, + clampFeedback); + } + Instruction buildDclInput(dxsa::InlineOperandAttr operand, Location loc) { return dxsa::DclInput::create(builder, loc, operand); } @@ -1649,6 +1745,53 @@ class Parser { fields->values, fields->values64); } + void parseExtendedInstruction(uint32_t extendedToken, + ExtendedInstruction &ext) { + switch (DECODE_D3D10_SB_EXTENDED_OPCODE_TYPE(extendedToken)) { + case D3D10_SB_EXTENDED_OPCODE_EMPTY: + return; + case D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS: { + auto token = static_cast(extendedToken); + int32_t offsets[3] = { + DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET( + D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_U, token), + DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET( + D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_V, token), + DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET( + D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_W, token), + }; + for (int32_t &offset : offsets) { + // Sign extend from 4 bits to 32. + if (offset & 0x8) { + offset |= 0xfffffff0; + } + } + ext.sampleOffset = {offsets[0], offsets[1], offsets[2]}; + return; + } + case D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM: { + auto dim = DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(extendedToken); + auto stride = + (dim == D3D11_SB_RESOURCE_DIMENSION_STRUCTURED_BUFFER) + ? std::optional( + DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE( + extendedToken)) + : std::nullopt; + ext.resourceDim = {dim, stride}; + return; + } + case D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE: { + ext.resourceReturnType = { + DECODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(extendedToken, 0), + DECODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(extendedToken, 1), + DECODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(extendedToken, 2), + DECODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(extendedToken, 3), + }; + return; + } + } + } + template FailureOr decodeSaturableBinaryOp(size_t beginOffset, uint32_t length, bool saturate, @@ -1667,6 +1810,121 @@ class Parser { return builder.buildBinaryOp(*dst, *lhs, *rhs, preciseMask, loc); } + FailureOr parseSampleOp(uint32_t opcode, + ExtendedInstruction &ext, + size_t beginOffset, uint32_t length, + Location loc) { + bool hasFeedback = false; + switch (opcode) { + case D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK: + hasFeedback = true; + break; + } + + auto dst = parseDstOperand(); + FAILURE_IF_FAILED(dst); + + dxsa::DstOperandAttr feedback; + if (hasFeedback) { + // For Clamp/Feedback variant, feedback operand is the second + // dst register. Clamp immediate is the last src operand. + auto op = parseDstOperand(); + FAILURE_IF_FAILED(op); + feedback = *op; + } + + auto srcAddress = parseSrcOperand(); + FAILURE_IF_FAILED(srcAddress); + auto srcResource = parseSrcOperand(); + FAILURE_IF_FAILED(srcResource); + auto srcSampler = parseSrcOperand(); + FAILURE_IF_FAILED(srcSampler); + + SmallVector extraOperands; + switch (opcode) { + case D3D10_SB_OPCODE_SAMPLE_D: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK: { + // 2 extra operands + auto extraOp = parseSrcOperand(); + FAILURE_IF_FAILED(srcAddress); + extraOperands.push_back(*extraOp); + [[fallthrough]]; + } + case D3D10_SB_OPCODE_SAMPLE_B: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK: + case D3D10_SB_OPCODE_SAMPLE_L: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK: + case D3D10_SB_OPCODE_SAMPLE_C: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK: + case D3D10_SB_OPCODE_SAMPLE_C_LZ: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK: { + // 1 extra operand + auto extraOp = parseSrcOperand(); + FAILURE_IF_FAILED(srcAddress); + extraOperands.push_back(*extraOp); + break; + } + } + + dxsa::SampleClampFeedbackAttr clampFeedback; + if (hasFeedback) { + auto clamp = parseSrcOperand(); + FAILURE_IF_FAILED(clamp); + clampFeedback = builder.buildSampleClampFeedbackAttr(*clamp, feedback); + } + + dxsa::SampleOffsetAttr offset; + if (ext.sampleOffset) { + offset = builder.buildSampleOffsetAttr(*ext.sampleOffset); + } + + FAILURE_IF_FAILED(verifyInstructionLength(beginOffset, length)); + + switch (opcode) { + case D3D10_SB_OPCODE_SAMPLE: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK: { + return builder.buildSampleOp(*dst, *srcAddress, *srcResource, *srcSampler, + offset, clampFeedback, loc); + } + case D3D10_SB_OPCODE_SAMPLE_D: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK: { + return builder.buildSampleDOp( + *dst, *srcAddress, *srcResource, *srcSampler, extraOperands[0], + extraOperands[1], offset, clampFeedback, loc); + } + case D3D10_SB_OPCODE_SAMPLE_B: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK: { + return builder.buildSampleBOp(*dst, *srcAddress, *srcResource, + *srcSampler, extraOperands[0], offset, + clampFeedback, loc); + } + case D3D10_SB_OPCODE_SAMPLE_L: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK: { + return builder.buildSampleLOp(*dst, *srcAddress, *srcResource, + *srcSampler, extraOperands[0], offset, + clampFeedback, loc); + } + case D3D10_SB_OPCODE_SAMPLE_C: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK: { + return builder.buildSampleCOp(*dst, *srcAddress, *srcResource, + *srcSampler, extraOperands[0], offset, + clampFeedback, loc); + } + case D3D10_SB_OPCODE_SAMPLE_C_LZ: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK: { + return builder.buildSampleCLZOp(*dst, *srcAddress, *srcResource, + *srcSampler, extraOperands[0], offset, + clampFeedback, loc); + } + } + report_fatal_error("unhandled instructions"); + } + FailureOr parseDclInput(Location loc) { auto operand = parseInlineOperand(); FAILURE_IF_FAILED(operand); @@ -2128,6 +2386,18 @@ class Parser { modifier.saturate = DECODE_IS_D3D10_SB_INSTRUCTION_SATURATE_ENABLED(*opcodeToken0); + ExtendedInstruction extendedInst; + if (DECODE_IS_D3D10_SB_OPCODE_EXTENDED(*opcodeToken0)) { + // opcodeToken0 is followed by zero or more opcodeToken1 that describe + // sampler or resource parameters. + Token opcodeToken1; + do { + opcodeToken1 = parseToken(); + FAILURE_IF_FAILED(opcodeToken1); + parseExtendedInstruction(*opcodeToken1, extendedInst); + } while (DECODE_IS_D3D10_SB_OPCODE_EXTENDED(*opcodeToken1)); + } + // TODO: extended instructions: // BOOL b51PlusShader = // BOOL bExtended = DECODE_IS_D3D10_SB_OPCODE_EXTENDED(Token) @@ -2166,6 +2436,20 @@ class Parser { return SATURABLE_BINARY_OP(Dp3); case D3D10_SB_OPCODE_DP4: return SATURABLE_BINARY_OP(Dp4); + case D3D10_SB_OPCODE_SAMPLE: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK: + case D3D10_SB_OPCODE_SAMPLE_D: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK: + case D3D10_SB_OPCODE_SAMPLE_B: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK: + case D3D10_SB_OPCODE_SAMPLE_L: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK: + case D3D10_SB_OPCODE_SAMPLE_C: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK: + case D3D10_SB_OPCODE_SAMPLE_C_LZ: + case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK: + return parseSampleOp(opcode, extendedInst, beginOffset, + instructionLengthInTokens, getLocation()); } #undef SATURABLE_BINARY_OP diff --git a/mlir/test/Target/DXSA/inputs/sample.bin b/mlir/test/Target/DXSA/inputs/sample.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cab7f3687f2f73b290f4d55516def522ea98ebe GIT binary patch literal 44 ocmZ=|VCX)?z|i0vD$4MQL4W}S+ytO>od5$fNFYIgffYyr0HjII4{=L5u_e238=&=m6p{ G*aHAgL, v<0, >, t<3, vector>, s<5> +0x8b000045, 0x800000c2, 0x00155543, 0x001000f2, +0x00000000, 0x00101046, 0x00000000, 0x00107e46, +0x00000003, 0x00106000, 0x00000005 diff --git a/mlir/test/Target/DXSA/sample_b.mlir b/mlir/test/Target/DXSA/sample_b.mlir new file mode 100644 index 000000000000..b168dde13002 --- /dev/null +++ b/mlir/test/Target/DXSA/sample_b.mlir @@ -0,0 +1,8 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s + +// CHECK: dxsa.sample_b r<0>, v<0, >, t<3, vector>, s<5>, v<0, > + +0x8d00004a, 0x800000c2, 0x00155543, 0x001000f2, +0x00000000, 0x00101046, 0x00000000, 0x00107e46, +0x00000003, 0x00106000, 0x00000005, 0x0010101a, +0x00000000 diff --git a/mlir/test/Target/DXSA/sample_b_clamp_feedback.mlir b/mlir/test/Target/DXSA/sample_b_clamp_feedback.mlir new file mode 100644 index 000000000000..4955caeea74f --- /dev/null +++ b/mlir/test/Target/DXSA/sample_b_clamp_feedback.mlir @@ -0,0 +1,10 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.sample_b r<1>, v<0, >, t<3, vector>, s<5>, v<0, >, , + +0x910000e7, 0x80003801, 0x800000c2, 0x00155543, +0x001000f2, 0x00000001, 0x0000d000, 0x00101046, +0x00000000, 0x00107e46, 0x00000003, 0x00106000, +0x00000005, 0x0010101a, 0x00000000, 0x00004001, +0x3f000000 diff --git a/mlir/test/Target/DXSA/sample_c.mlir b/mlir/test/Target/DXSA/sample_c.mlir new file mode 100644 index 000000000000..5b94aa69bbe7 --- /dev/null +++ b/mlir/test/Target/DXSA/sample_c.mlir @@ -0,0 +1,8 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s + +// CHECK: dxsa.sample_c r<0, >, v<0, >, t<3, vector, >, s<5>, r<0, > + +0x8d000046, 0x800000c2, 0x00155543, 0x00100022, +0x00000000, 0x00101046, 0x00000000, 0x00107006, +0x00000003, 0x00106000, 0x00000005, 0x0010000a, +0x00000000 diff --git a/mlir/test/Target/DXSA/sample_c_clamp_feedback.mlir b/mlir/test/Target/DXSA/sample_c_clamp_feedback.mlir new file mode 100644 index 000000000000..4cecff4fface --- /dev/null +++ b/mlir/test/Target/DXSA/sample_c_clamp_feedback.mlir @@ -0,0 +1,10 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.sample_c r<1, >, v<0, >, t<3, vector, >, s<5>, r<0, >, , + +0x910000e9, 0x80003801, 0x800000c2, 0x00155543, +0x00100012, 0x00000001, 0x0000d000, 0x00101046, +0x00000000, 0x00107006, 0x00000003, 0x00106000, +0x00000005, 0x0010000a, 0x00000000, 0x00004001, +0x3f000000 diff --git a/mlir/test/Target/DXSA/sample_c_lz.mlir b/mlir/test/Target/DXSA/sample_c_lz.mlir new file mode 100644 index 000000000000..873ec3fcb5fe --- /dev/null +++ b/mlir/test/Target/DXSA/sample_c_lz.mlir @@ -0,0 +1,8 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s + +// CHECK: dxsa.sample_c_lz r<0, >, v<0, >, t<3, vector, >, s<5>, r<0, > + +0x8d000047, 0x800000c2, 0x00155543, 0x00100022, +0x00000000, 0x00101046, 0x00000000, 0x00107006, +0x00000003, 0x00106000, 0x00000005, 0x0010000a, +0x00000000 diff --git a/mlir/test/Target/DXSA/sample_c_lz_clamp_feedback.mlir b/mlir/test/Target/DXSA/sample_c_lz_clamp_feedback.mlir new file mode 100644 index 000000000000..c5da6069462d --- /dev/null +++ b/mlir/test/Target/DXSA/sample_c_lz_clamp_feedback.mlir @@ -0,0 +1,10 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.sample_c_lz r<1, >, v<0, >, t<3, vector, >, s<5>, r<0, >, , + +0x910000e5, 0x80003801, 0x800000c2, 0x00155543, +0x00100012, 0x00000001, 0x0000d000, 0x00101046, +0x00000000, 0x00107006, 0x00000003, 0x00106000, +0x00000005, 0x0010000a, 0x00000000, 0x00004001, +0x3f000000 diff --git a/mlir/test/Target/DXSA/sample_clamp_feedback.mlir b/mlir/test/Target/DXSA/sample_clamp_feedback.mlir new file mode 100644 index 000000000000..b54304f09786 --- /dev/null +++ b/mlir/test/Target/DXSA/sample_clamp_feedback.mlir @@ -0,0 +1,8 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.sample r<1>, v<0, >, t<3, vector>, s<5>, , +0x8f0000e6, 0x80003801, 0x800000c2, 0x00155543, +0x001000f2, 0x00000001, 0x0000d000, 0x00101046, +0x00000000, 0x00107e46, 0x00000003, 0x00106000, +0x00000005, 0x00004001, 0x3f000000 diff --git a/mlir/test/Target/DXSA/sample_d.mlir b/mlir/test/Target/DXSA/sample_d.mlir new file mode 100644 index 000000000000..6cfd3bf59749 --- /dev/null +++ b/mlir/test/Target/DXSA/sample_d.mlir @@ -0,0 +1,8 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s + +// CHECK: dxsa.sample_d r<0>, v<0, >, t<3, vector>, s<5> + +0x8f000049, 0x800000c2, 0x00155543, 0x001000f2, +0x00000000, 0x00101046, 0x00000000, 0x00107e46, +0x00000003, 0x00106000, 0x00000005, 0x00101556, +0x00000000, 0x00101516, 0x00000000 diff --git a/mlir/test/Target/DXSA/sample_d_clamp_feedback.mlir b/mlir/test/Target/DXSA/sample_d_clamp_feedback.mlir new file mode 100644 index 000000000000..192f24a982d3 --- /dev/null +++ b/mlir/test/Target/DXSA/sample_d_clamp_feedback.mlir @@ -0,0 +1,10 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.sample_d r<1>, v<0, >, t<3, vector>, s<5>, v<0, >, v<0, >, , + +0x930000e8, 0x80003801, 0x800000c2, 0x00155543, +0x001000f2, 0x00000001, 0x0000d000, 0x00101046, +0x00000000, 0x00107e46, 0x00000003, 0x00106000, +0x00000005, 0x00101556, 0x00000000, 0x00101516, +0x00000000, 0x00004001, 0x3f000000 diff --git a/mlir/test/Target/DXSA/sample_l.mlir b/mlir/test/Target/DXSA/sample_l.mlir new file mode 100644 index 000000000000..7eae666bf0a2 --- /dev/null +++ b/mlir/test/Target/DXSA/sample_l.mlir @@ -0,0 +1,8 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s + +// CHECK: dxsa.sample_l r<0>, v<0, >, t<3, vector>, s<5> + +0x8d000048, 0x800000c2, 0x00155543, 0x001000f2, +0x00000000, 0x00101046, 0x00000000, 0x00107e46, +0x00000003, 0x00106000, 0x00000005, 0x0010101a, +0x00000000 diff --git a/mlir/test/Target/DXSA/sample_l_clamp_feedback.mlir b/mlir/test/Target/DXSA/sample_l_clamp_feedback.mlir new file mode 100644 index 000000000000..1cfd464d6f8f --- /dev/null +++ b/mlir/test/Target/DXSA/sample_l_clamp_feedback.mlir @@ -0,0 +1,10 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.sample_l r<1, >, v<0, >, t<3, vector, >, s<5>, r<0, >, , + +0x910000e4, 0x80003801, 0x800000c2, 0x00155543, +0x00100012, 0x00000001, 0x0000d000, 0x00101046, +0x00000000, 0x00107006, 0x00000003, 0x00106000, +0x00000005, 0x0010000a, 0x00000000, 0x00004001, +0x3f000000 diff --git a/mlir/test/Target/DXSA/sample_offset.mlir b/mlir/test/Target/DXSA/sample_offset.mlir new file mode 100644 index 000000000000..1a69fe4a494d --- /dev/null +++ b/mlir/test/Target/DXSA/sample_offset.mlir @@ -0,0 +1,7 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.sample r<1>, v<0, >, t<3, vector>, s<5>, +0x8c000045, 0x8000f601, 0x800000c2, 0x00155543, +0x001000f2, 0x00000001, 0x00101046, 0x00000000, +0x00107e46, 0x00000003, 0x00106000, 0x00000005 diff --git a/mlir/test/Target/DXSA/sample_offset_invalid.mlir b/mlir/test/Target/DXSA/sample_offset_invalid.mlir new file mode 100644 index 000000000000..e9a1cba1fb00 --- /dev/null +++ b/mlir/test/Target/DXSA/sample_offset_invalid.mlir @@ -0,0 +1,29 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +// expected-error@+1 {{sample offsets must be 4 bit 2's complement numbers, having integer range [-8,7]}} +dxsa.sample r<1>, v<0, >, t<3, vector>, s<5>, + +// ----- + +// expected-error@+1 {{sample offsets must be 4 bit 2's complement numbers, having integer range [-8,7]}} +dxsa.sample r<1>, v<0, >, t<3, vector>, s<5>, + +// ----- + +// expected-error@+1 {{sample offsets must be 4 bit 2's complement numbers, having integer range [-8,7]}} +dxsa.sample r<1>, v<0, >, t<3, vector>, s<5>, + +// ----- + +// expected-error@+1 {{sample offsets must be 4 bit 2's complement numbers, having integer range [-8,7]}} +dxsa.sample r<1>, v<0, >, t<3, vector>, s<5>, + +// ----- + +// expected-error@+1 {{sample offsets must be 4 bit 2's complement numbers, having integer range [-8,7]}} +dxsa.sample r<1>, v<0, >, t<3, vector>, s<5>, + +// ----- + +// expected-error@+1 {{sample offsets must be 4 bit 2's complement numbers, having integer range [-8,7]}} +dxsa.sample r<1>, v<0, >, t<3, vector>, s<5>, From 0a8f47778af62dd3cc2753d1245896dc953c9a0a Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Thu, 25 Jun 2026 17:52:46 +0900 Subject: [PATCH 2/2] [mlir][dxsa] Add gather4 variants --- .../mlir/Dialect/DXSA/IR/DXSAResourceOps.td | 194 ++++++++++++++++++ mlir/lib/Target/DXSA/BinaryParser.cpp | 142 +++++++++++++ mlir/test/Target/DXSA/gather4.test | 21 ++ mlir/test/Target/DXSA/gather4_c.test | 23 +++ mlir/test/Target/DXSA/gather4_po.test | 16 ++ mlir/test/Target/DXSA/gather4_po_c.test | 17 ++ 6 files changed, 413 insertions(+) create mode 100644 mlir/test/Target/DXSA/gather4.test create mode 100644 mlir/test/Target/DXSA/gather4_c.test create mode 100644 mlir/test/Target/DXSA/gather4_po.test create mode 100644 mlir/test/Target/DXSA/gather4_po_c.test diff --git a/mlir/include/mlir/Dialect/DXSA/IR/DXSAResourceOps.td b/mlir/include/mlir/Dialect/DXSA/IR/DXSAResourceOps.td index e0f9316ee114..d7db2e9b45c3 100644 --- a/mlir/include/mlir/Dialect/DXSA/IR/DXSAResourceOps.td +++ b/mlir/include/mlir/Dialect/DXSA/IR/DXSAResourceOps.td @@ -270,4 +270,198 @@ def DXSA_SampleCLZ : DXSA_SampleC_Base<"sample_c_lz"> { }]; } +//===----------------------------------------------------------------------===// +// dxsa.gather4 +//===----------------------------------------------------------------------===// + +def DXSA_Gather4 : DXSA_Op<"gather4"> { + let summary = "gathers four texels and packs them into a single register"; + let description = [{ + The `dxsa.gather4` operation gathers the four texels that would be used in a + bi-linear filtering operation and packs them into a single register. Only + works with 2D or CubeMap textures (incl arrays). Only the addressing modes + of the sampler are used and the top level of any mip pyramid is used. + + `dxsa.gather4` behaves like the `dxsa.sample` instruction, but a filtered + sample is not generated. The four samples that would contribute to + filtering are placed into xyzw in counter clockwise order starting with the + sample to the lower left of the queried location. This is the same as point + sampling with (u,v) texture coordinate deltas at the following locations: + (-,+),(+,+),(+,-),(-,-), where the magnitude of the deltas are always half a + texel. + + `src_address` provides the set of texture coordinates needed to perform the + sample, as floating point values referencing normalized space in the + texture. + + `src_resource` is a texture register (t). This is simply a placeholder for a + texture, including the return data type of the resource being sampled. + + `src_sampler` is a sampler register (s). This is simply a placeholder for a + collection of filtering controls (such as point vs. linear, mipmapping and + address wrapping controls). + + The optional `offset` operand suffix (address offset by immediate integer) + indicates that the texture coordinates for the sample are to be offset by a + set of provided immediate texel space integer constant values. The literal + values are a set of 4 bit 2's complement numbers, having integer range + [-8,7]. + + The optional `feedback` operand appends an additional 32 bit scalar Tiled + Resources shader feedback status output value. Can be NULL (or not present) + if not used. See Tiled Resources Texture Sampling Features(5.9.4.5) for + details. + + Example: + + ```mlir + dxsa.gather4 r<0>, v<0, >, t<0, vector>, s<0, vector, > + dxsa.gather4 r<1>, v<0, >, t<0, vector>, s<0, vector, >, + dxsa.gather4 r<1>, v<0, >, t<0, vector>, s<0, vector, >, , r<2, > + ``` + }]; + + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$src_address, + DXSA_SrcOperandAttr:$src_resource, + DXSA_SrcOperandAttr:$src_sampler, + OptionalAttr:$offset, + OptionalAttr:$feedback); + let results = (outs); + + let assemblyFormat = [{ + $dst `,` $src_address `,` $src_resource `,` $src_sampler + (`,` $offset^)? (`,` $feedback^)? + attr-dict + }]; +} + +//===----------------------------------------------------------------------===// +// dxsa.gather4_c +//===----------------------------------------------------------------------===// + +def DXSA_Gather4C : DXSA_Op<"gather4_c"> { + let summary = "same as `dxsa.gather4`, except performs comparison on texels, similar to `dxsa.sample_c`"; + let description = [{ + The operands to `dxsa.gather4_c` are identical to `dxsa.gather4`, except + that there is an additional float32 source operand, `src_reference_value`, + which must be a register with single-component selected, or a scalar + literal. + + See existing `dxsa.sample_c` for how `src_reference_value` gets compared + against each fetched texel. Unlike `dxsa.sample_c`, `dxsa.gather4_c` simply + returns each comparison result, rather than filtering them. + + Example: + + ```mlir + dxsa.gather4_c r<0>, v<0, >, t<0, vector>, s<0, vector, >, v<0, > + dxsa.gather4_c r<1>, v<0, >, t<0, vector>, s<0, vector, >, v<0, >, + dxsa.gather4_c r<1>, v<0, >, t<0, vector>, s<0, vector, >, v<0, >, , r<2, > + ``` + }]; + + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$src_address, + DXSA_SrcOperandAttr:$src_resource, + DXSA_SrcOperandAttr:$src_sampler, + DXSA_SrcOperandAttr:$src_reference_value, + OptionalAttr:$offset, + OptionalAttr:$feedback); + let results = (outs); + + let assemblyFormat = [{ + $dst `,` $src_address `,` $src_resource `,` $src_sampler `,` $src_reference_value + (`,` $offset^)? (`,` $feedback^)? + attr-dict + }]; +} + +//===----------------------------------------------------------------------===// +// dxsa.gather4_po +//===----------------------------------------------------------------------===// + +def DXSA_Gather4PO : DXSA_Op<"gather4_po"> { + let summary = "variant of `dxsa.gather4`, where the offset comes as a parameter to the instruction"; + let description = [{ + Variant of `dxsa.gather4`, where instead of supporting an immediate offset + [-8..7], the offset comes as a `src_offset` parameter to the instruction, + and also has larger range of [-32..31]. + + The first 2 components of the 4-vector offset parameter supply 32-bit + integer offsets. The other components of this parameter are ignored. + + The 6 least significant bits of each offset value is honored as a signed + value, yielding [-32..31] range. + + `dxsa.gather4_po` only works with 2D textures (unlike gather4, which also + works with TextureCubes). + + Example: + + ```mlir + dxsa.gather4_po r<1, >, v<0, >, r<0, >, t<0, vector>, s<0, vector, > + dxsa.gather4_po r<3, >, v<0, >, r<0, >, t<0, vector>, s<0, vector, >, r<4, > + ``` + }]; + + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$src_address, + DXSA_SrcOperandAttr:$src_offset, + DXSA_SrcOperandAttr:$src_resource, + DXSA_SrcOperandAttr:$src_sampler, + OptionalAttr:$feedback); + let results = (outs); + + let assemblyFormat = [{ + $dst `,` $src_address `,` $src_offset `,` $src_resource `,` $src_sampler + (`,` $feedback^)? + attr-dict + }]; +} + +//===----------------------------------------------------------------------===// +// dxsa.gather4_po_c +//===----------------------------------------------------------------------===// + +def DXSA_Gather4POC : DXSA_Op<"gather4_po_c"> { + let summary = "Same as `dxsa.gather4_po`, except performs comparison on texels, similar to `dxsa.sample_c`"; + let description = [{ + The operands to `dxsa.gather4_po_c` are identical to `dxsa.gather4_po`, + except that there is an additional float32 source operand, + `src_reference_value`, which must be a register with single-component + selected, or a scalar literal. + + See existing `dxsa.sample_c` for how `src_reference_value` gets compared + against each fetched texel. Unlike `dxsa.sample_c`, `dxsa.gather4_po_c` + simply returns each comparison result, rather than filtering them. + + Example: + + ```mlir + dxsa.gather4_po_c r<1, >, v<0, >, r<0, >, t<0, vector>, s<0, vector, >, v<0, > + dxsa.gather4_po_c r<3, >, v<0, >, r<0, >, t<0, vector>, s<0, vector, >, v<0, >, r<4, > + ``` + }]; + + let arguments = (ins + DXSA_DstOperandAttr:$dst, + DXSA_SrcOperandAttr:$src_address, + DXSA_SrcOperandAttr:$src_offset, + DXSA_SrcOperandAttr:$src_resource, + DXSA_SrcOperandAttr:$src_sampler, + DXSA_SrcOperandAttr:$src_reference_value, + OptionalAttr:$feedback); + let results = (outs); + + let assemblyFormat = [{ + $dst `,` $src_address `,` $src_offset `,` $src_resource `,` $src_sampler `,` $src_reference_value + (`,` $feedback^)? + attr-dict + }]; +} + #endif // MLIR_DIALECT_DXSA_IR_DXSARESOURCEOPS diff --git a/mlir/lib/Target/DXSA/BinaryParser.cpp b/mlir/lib/Target/DXSA/BinaryParser.cpp index bbce68f1674e..377dad3a1b3e 100644 --- a/mlir/lib/Target/DXSA/BinaryParser.cpp +++ b/mlir/lib/Target/DXSA/BinaryParser.cpp @@ -894,6 +894,50 @@ class DXBuilder { clampFeedback); } + Instruction buildGather4(dxsa::DstOperandAttr dst, + dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcResource, + dxsa::SrcOperandAttr srcSampler, + dxsa::SampleOffsetAttr offset, + dxsa::DstOperandAttr feedback, Location loc) { + return dxsa::Gather4::create(builder, loc, dst, srcAddress, srcResource, + srcSampler, offset, feedback); + } + + Instruction buildGather4C(dxsa::DstOperandAttr dst, + dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcResource, + dxsa::SrcOperandAttr srcSampler, + dxsa::SrcOperandAttr srcReferenceValue, + dxsa::SampleOffsetAttr offset, + dxsa::DstOperandAttr feedback, Location loc) { + return dxsa::Gather4C::create(builder, loc, dst, srcAddress, srcResource, + srcSampler, srcReferenceValue, offset, + feedback); + } + + Instruction buildGather4PO(dxsa::DstOperandAttr dst, + dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcOffset, + dxsa::SrcOperandAttr srcResource, + dxsa::SrcOperandAttr srcSampler, + dxsa::DstOperandAttr feedback, Location loc) { + return dxsa::Gather4PO::create(builder, loc, dst, srcAddress, srcOffset, + srcResource, srcSampler, feedback); + } + + Instruction buildGather4POC(dxsa::DstOperandAttr dst, + dxsa::SrcOperandAttr srcAddress, + dxsa::SrcOperandAttr srcOffset, + dxsa::SrcOperandAttr srcResource, + dxsa::SrcOperandAttr srcSampler, + dxsa::SrcOperandAttr srcReferenceValue, + dxsa::DstOperandAttr feedback, Location loc) { + return dxsa::Gather4POC::create(builder, loc, dst, srcAddress, srcOffset, + srcResource, srcSampler, srcReferenceValue, + feedback); + } + Instruction buildDclInput(dxsa::InlineOperandAttr operand, Location loc) { return dxsa::DclInput::create(builder, loc, operand); } @@ -1925,6 +1969,94 @@ class Parser { report_fatal_error("unhandled instructions"); } + FailureOr parseGather4(uint32_t opcode, ExtendedInstruction &ext, + size_t beginOffset, uint32_t length, + Location loc) { + bool hasFeedback = false; + switch (opcode) { + case D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK: + case D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK: + case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK: + case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK: + hasFeedback = true; + break; + } + + auto dst = parseDstOperand(); + FAILURE_IF_FAILED(dst); + + dxsa::DstOperandAttr feedback; + if (hasFeedback) { + // For Feedback variant, feedback operand is the second + // dst register. + auto op = parseDstOperand(); + FAILURE_IF_FAILED(op); + feedback = *op; + } + + auto srcAddress = parseSrcOperand(); + FAILURE_IF_FAILED(srcAddress); + + dxsa::SrcOperandAttr srcOffset; + switch (opcode) { + case D3D11_SB_OPCODE_GATHER4_PO: + case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK: + case D3D11_SB_OPCODE_GATHER4_PO_C: + case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK: { + auto offset = parseSrcOperand(); + FAILURE_IF_FAILED(offset); + srcOffset = *offset; + } + } + + auto srcResource = parseSrcOperand(); + FAILURE_IF_FAILED(srcResource); + auto srcSampler = parseSrcOperand(); + FAILURE_IF_FAILED(srcSampler); + + dxsa::SrcOperandAttr extraOperand; + switch (opcode) { + case D3D11_SB_OPCODE_GATHER4_C: + case D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK: + case D3D11_SB_OPCODE_GATHER4_PO_C: + case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK: { + // 1 extra operand + auto extraOp = parseSrcOperand(); + FAILURE_IF_FAILED(srcAddress); + extraOperand = *extraOp; + break; + } + } + + dxsa::SampleOffsetAttr offset; + if (ext.sampleOffset) { + offset = builder.buildSampleOffsetAttr(*ext.sampleOffset); + } + + FAILURE_IF_FAILED(verifyInstructionLength(beginOffset, length)); + + switch (opcode) { + case D3D10_1_SB_OPCODE_GATHER4: + case D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK: + return builder.buildGather4(*dst, *srcAddress, *srcResource, *srcSampler, + offset, feedback, loc); + case D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK: + case D3D11_SB_OPCODE_GATHER4_C: + return builder.buildGather4C(*dst, *srcAddress, *srcResource, *srcSampler, + extraOperand, offset, feedback, loc); + case D3D11_SB_OPCODE_GATHER4_PO: + case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK: + return builder.buildGather4PO(*dst, *srcAddress, srcOffset, *srcResource, + *srcSampler, feedback, loc); + case D3D11_SB_OPCODE_GATHER4_PO_C: + case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK: + return builder.buildGather4POC(*dst, *srcAddress, srcOffset, *srcResource, + *srcSampler, extraOperand, feedback, + loc); + } + report_fatal_error("unhandled instructions"); + } + FailureOr parseDclInput(Location loc) { auto operand = parseInlineOperand(); FAILURE_IF_FAILED(operand); @@ -2450,6 +2582,16 @@ class Parser { case D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK: return parseSampleOp(opcode, extendedInst, beginOffset, instructionLengthInTokens, getLocation()); + case D3D10_1_SB_OPCODE_GATHER4: + case D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK: + case D3D11_SB_OPCODE_GATHER4_C: + case D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK: + case D3D11_SB_OPCODE_GATHER4_PO: + case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK: + case D3D11_SB_OPCODE_GATHER4_PO_C: + case D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK: + return parseGather4(opcode, extendedInst, beginOffset, + instructionLengthInTokens, getLocation()); } #undef SATURABLE_BINARY_OP diff --git a/mlir/test/Target/DXSA/gather4.test b/mlir/test/Target/DXSA/gather4.test new file mode 100644 index 000000000000..423c1edea2c4 --- /dev/null +++ b/mlir/test/Target/DXSA/gather4.test @@ -0,0 +1,21 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.gather4 r<0>, v<0, >, t<0, vector>, s<0, vector, > + +0x8b00006d, 0x800000c2, 0x00155543, 0x001000f2, +0x00000000, 0x00101046, 0x00000000, 0x00107e46, +0x00000000, 0x0010600a, 0x00000000 + +// CHECK: dxsa.gather4 r<1>, v<0, >, t<0, vector>, s<0, vector, >, + +0x8c00006d, 0x8000f601, 0x800000c2, 0x00155543, +0x001000f2, 0x00000001, 0x00101046, 0x00000000, +0x00107e46, 0x00000000, 0x0010601a, 0x00000000 + +// CHECK: dxsa.gather4 r<1>, v<0, >, t<0, vector>, s<0, vector, >, , r<2, > + +0x8e0000db, 0x80005a01, 0x800000c2, 0x00155543, +0x001000f2, 0x00000001, 0x00100012, 0x00000002, +0x00101046, 0x00000000, 0x00107e46, 0x00000000, +0x0010603a, 0x00000000 diff --git a/mlir/test/Target/DXSA/gather4_c.test b/mlir/test/Target/DXSA/gather4_c.test new file mode 100644 index 000000000000..8c74005f4eaf --- /dev/null +++ b/mlir/test/Target/DXSA/gather4_c.test @@ -0,0 +1,23 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.gather4_c r<0>, v<0, >, t<0, vector>, s<0, vector, >, v<0, > + +0x8d00007e, 0x800000c2, 0x00155543, 0x001000f2, +0x00000000, 0x00101046, 0x00000000, 0x00107e46, +0x00000000, 0x0010600a, 0x00000000, 0x0010101a, +0x00000000 + +// CHECK: dxsa.gather4_c r<1>, v<0, >, t<0, vector>, s<0, vector, >, v<0, >, + +0x8e00007e, 0x8000f601, 0x800000c2, 0x00155543, +0x001000f2, 0x00000001, 0x00101046, 0x00000000, +0x00107e46, 0x00000000, 0x0010601a, 0x00000000, +0x0010101a, 0x00000000 + +// CHECK: dxsa.gather4_c r<1>, v<0, >, t<0, vector>, s<0, vector, >, v<0, >, , r<2, > + +0x900000dc, 0x80005a01, 0x800000c2, 0x00155543, +0x001000f2, 0x00000001, 0x00100012, 0x00000002, +0x00101046, 0x00000000, 0x00107e46, 0x00000000, +0x0010603a, 0x00000000, 0x0010101a, 0x00000000 diff --git a/mlir/test/Target/DXSA/gather4_po.test b/mlir/test/Target/DXSA/gather4_po.test new file mode 100644 index 000000000000..e1676f7331d4 --- /dev/null +++ b/mlir/test/Target/DXSA/gather4_po.test @@ -0,0 +1,16 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.gather4_po r<1, >, v<0, >, r<0, >, t<0, vector>, s<0, vector, > + +0x8d00007f, 0x800000c2, 0x00155543, 0x00100012, +0x00000001, 0x00101046, 0x00000000, 0x00100046, +0x00000000, 0x00107e46, 0x00000000, 0x0010600a, +0x00000000 + +// CHECK: dxsa.gather4_po r<3, >, v<0, >, r<0, >, t<0, vector>, s<0, vector, >, r<4, > + +0x8f0000dd, 0x800000c2, 0x00155543, 0x00100012, +0x00000003, 0x00100012, 0x00000004, 0x00101046, +0x00000000, 0x00100046, 0x00000000, 0x00107e46, +0x00000000, 0x0010603a, 0x00000000 diff --git a/mlir/test/Target/DXSA/gather4_po_c.test b/mlir/test/Target/DXSA/gather4_po_c.test new file mode 100644 index 000000000000..e54668ae7f69 --- /dev/null +++ b/mlir/test/Target/DXSA/gather4_po_c.test @@ -0,0 +1,17 @@ +// RUN: mlir-translate --import-dxsa-hex %s | FileCheck %s +// RUN: mlir-translate --import-dxsa-hex %s | mlir-opt --verify-roundtrip + +// CHECK: dxsa.gather4_po_c r<1, >, v<0, >, r<0, >, t<0, vector>, s<0, vector, >, v<0, > + +0x8f000080, 0x800000c2, 0x00155543, 0x00100012, +0x00000001, 0x00101046, 0x00000000, 0x00100046, +0x00000000, 0x00107e46, 0x00000000, 0x0010600a, +0x00000000, 0x0010101a, 0x00000000 + +// CHECK: dxsa.gather4_po_c r<3, >, v<0, >, r<0, >, t<0, vector>, s<0, vector, >, v<0, >, r<4, > + +0x910000de, 0x800000c2, 0x00155543, 0x00100012, +0x00000003, 0x00100012, 0x00000004, 0x00101046, +0x00000000, 0x00100046, 0x00000000, 0x00107e46, +0x00000000, 0x0010603a, 0x00000000, 0x0010101a, +0x00000000