1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
19#include "Disassembler/AMDGPUDisassembler.h"
20#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21#include "SIDefines.h"
22#include "SIRegisterInfo.h"
23#include "TargetInfo/AMDGPUTargetInfo.h"
24#include "Utils/AMDGPUBaseInfo.h"
25#include "llvm-c/DisassemblerTypes.h"
26#include "llvm/BinaryFormat/ELF.h"
27#include "llvm/MC/MCAsmInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCDecoderOps.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInstrDesc.h"
32#include "llvm/MC/MCRegisterInfo.h"
33#include "llvm/MC/MCSubtargetInfo.h"
34#include "llvm/MC/TargetRegistry.h"
35#include "llvm/Support/AMDHSAKernelDescriptor.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "amdgpu-disassembler"
40
41#define SGPR_MAX \
42 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
43 : AMDGPU::EncValues::SGPR_MAX_SI)
44
45using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
46
47AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
48 MCContext &Ctx, MCInstrInfo const *MCII)
49 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
50 MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(STI: &STI)),
51 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
52 // ToDo: AMDGPUDisassembler supports only VI ISA.
53 if (!STI.hasFeature(AMDGPU::Feature: FeatureGCN3Encoding) && !isGFX10Plus())
54 report_fatal_error(reason: "Disassembly not yet supported for subtarget");
55}
56
57void AMDGPUDisassembler::setABIVersion(unsigned Version) {
58 CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(ABIVersion: Version);
59}
60
61inline static MCDisassembler::DecodeStatus
62addOperand(MCInst &Inst, const MCOperand& Opnd) {
63 Inst.addOperand(Op: Opnd);
64 return Opnd.isValid() ?
65 MCDisassembler::Success :
66 MCDisassembler::Fail;
67}
68
69static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
70 uint16_t NameIdx) {
71 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), NamedIdx: NameIdx);
72 if (OpIdx != -1) {
73 auto I = MI.begin();
74 std::advance(i&: I, n: OpIdx);
75 MI.insert(I, Op);
76 }
77 return OpIdx;
78}
79
80static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
81 uint64_t Addr,
82 const MCDisassembler *Decoder) {
83 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
84
85 // Our branches take a simm16, but we need two extra bits to account for the
86 // factor of 4.
87 APInt SignedOffset(18, Imm * 4, true);
88 int64_t Offset = (SignedOffset.sext(width: 64) + 4 + Addr).getSExtValue();
89
90 if (DAsm->tryAddingSymbolicOperand(Inst, Value: Offset, Address: Addr, IsBranch: true, Offset: 2, OpSize: 2, InstSize: 0))
91 return MCDisassembler::Success;
92 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Imm));
93}
94
95static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
96 const MCDisassembler *Decoder) {
97 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
98 int64_t Offset;
99 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
100 Offset = SignExtend64<24>(x: Imm);
101 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
102 Offset = Imm & 0xFFFFF;
103 } else { // GFX9+ supports 21-bit signed offsets.
104 Offset = SignExtend64<21>(x: Imm);
105 }
106 return addOperand(Inst, Opnd: MCOperand::createImm(Val: Offset));
107}
108
109static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
110 const MCDisassembler *Decoder) {
111 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
112 return addOperand(Inst, Opnd: DAsm->decodeBoolReg(Val));
113}
114
115static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
116 uint64_t Addr,
117 const MCDisassembler *Decoder) {
118 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
119 return addOperand(Inst, Opnd: DAsm->decodeSplitBarrier(Val));
120}
121
122static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
123 const MCDisassembler *Decoder) {
124 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
125 return addOperand(Inst, Opnd: DAsm->decodeDpp8FI(Val));
126}
127
128#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
129 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
130 uint64_t /*Addr*/, \
131 const MCDisassembler *Decoder) { \
132 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
133 return addOperand(Inst, DAsm->DecoderName(Imm)); \
134 }
135
136// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
137// number of register. Used by VGPR only and AGPR only operands.
138#define DECODE_OPERAND_REG_8(RegClass) \
139 static DecodeStatus Decode##RegClass##RegisterClass( \
140 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
141 const MCDisassembler *Decoder) { \
142 assert(Imm < (1 << 8) && "8-bit encoding"); \
143 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
144 return addOperand( \
145 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
146 }
147
148#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
149 ImmWidth) \
150 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
151 const MCDisassembler *Decoder) { \
152 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
153 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
154 return addOperand(Inst, \
155 DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
156 MandatoryLiteral, ImmWidth)); \
157 }
158
159static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
160 AMDGPUDisassembler::OpWidthTy OpWidth,
161 unsigned Imm, unsigned EncImm,
162 bool MandatoryLiteral, unsigned ImmWidth,
163 AMDGPU::OperandSemantics Sema,
164 const MCDisassembler *Decoder) {
165 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
166 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
167 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Width: OpWidth, Val: EncImm, MandatoryLiteral,
168 ImmWidth, Sema));
169}
170
171// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
172// get register class. Used by SGPR only operands.
173#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
174 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
175
176// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
177// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
178// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
179// Used by AV_ register classes (AGPR or VGPR only register operands).
180template <AMDGPUDisassembler::OpWidthTy OpWidth>
181static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
182 const MCDisassembler *Decoder) {
183 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm | AMDGPU::EncValues::IS_VGPR,
184 MandatoryLiteral: false, ImmWidth: 0, Sema: AMDGPU::OperandSemantics::INT, Decoder);
185}
186
187// Decoder for Src(9-bit encoding) registers only.
188template <AMDGPUDisassembler::OpWidthTy OpWidth>
189static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
190 uint64_t /* Addr */,
191 const MCDisassembler *Decoder) {
192 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, MandatoryLiteral: false, ImmWidth: 0,
193 Sema: AMDGPU::OperandSemantics::INT, Decoder);
194}
195
196// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
197// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
198// only.
199template <AMDGPUDisassembler::OpWidthTy OpWidth>
200static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
201 const MCDisassembler *Decoder) {
202 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, MandatoryLiteral: false, ImmWidth: 0,
203 Sema: AMDGPU::OperandSemantics::INT, Decoder);
204}
205
206// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
207// Imm{9} is acc, registers only.
208template <AMDGPUDisassembler::OpWidthTy OpWidth>
209static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
210 uint64_t /* Addr */,
211 const MCDisassembler *Decoder) {
212 return decodeSrcOp(Inst, EncSize: 10, OpWidth, Imm, EncImm: Imm, MandatoryLiteral: false, ImmWidth: 0,
213 Sema: AMDGPU::OperandSemantics::INT, Decoder);
214}
215
216// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
217// register from RegClass or immediate. Registers that don't belong to RegClass
218// will be decoded and InstPrinter will report warning. Immediate will be
219// decoded into constant of size ImmWidth, should match width of immediate used
220// by OperandType (important for floating point types).
221template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
222 unsigned OperandSemantics>
223static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
224 uint64_t /* Addr */,
225 const MCDisassembler *Decoder) {
226 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, MandatoryLiteral: false, ImmWidth,
227 Sema: (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
228}
229
230// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
231// and decode using 'enum10' from decodeSrcOp.
232template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
233 unsigned OperandSemantics>
234static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
235 uint64_t /* Addr */,
236 const MCDisassembler *Decoder) {
237 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm | 512, MandatoryLiteral: false, ImmWidth,
238 Sema: (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
239}
240
241template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
242 unsigned OperandSemantics>
243static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm,
244 uint64_t /* Addr */,
245 const MCDisassembler *Decoder) {
246 return decodeSrcOp(Inst, EncSize: 9, OpWidth, Imm, EncImm: Imm, MandatoryLiteral: true, ImmWidth,
247 Sema: (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
248}
249
250// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
251// when RegisterClass is used as an operand. Most often used for destination
252// operands.
253
254DECODE_OPERAND_REG_8(VGPR_32)
255DECODE_OPERAND_REG_8(VGPR_32_Lo128)
256DECODE_OPERAND_REG_8(VReg_64)
257DECODE_OPERAND_REG_8(VReg_96)
258DECODE_OPERAND_REG_8(VReg_128)
259DECODE_OPERAND_REG_8(VReg_256)
260DECODE_OPERAND_REG_8(VReg_288)
261DECODE_OPERAND_REG_8(VReg_352)
262DECODE_OPERAND_REG_8(VReg_384)
263DECODE_OPERAND_REG_8(VReg_512)
264DECODE_OPERAND_REG_8(VReg_1024)
265
266DECODE_OPERAND_REG_7(SReg_32, OPW32)
267DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
268DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
269DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
270DECODE_OPERAND_REG_7(SReg_64, OPW64)
271DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
272DECODE_OPERAND_REG_7(SReg_96, OPW96)
273DECODE_OPERAND_REG_7(SReg_128, OPW128)
274DECODE_OPERAND_REG_7(SReg_256, OPW256)
275DECODE_OPERAND_REG_7(SReg_512, OPW512)
276
277DECODE_OPERAND_REG_8(AGPR_32)
278DECODE_OPERAND_REG_8(AReg_64)
279DECODE_OPERAND_REG_8(AReg_128)
280DECODE_OPERAND_REG_8(AReg_256)
281DECODE_OPERAND_REG_8(AReg_512)
282DECODE_OPERAND_REG_8(AReg_1024)
283
284static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
285 uint64_t /*Addr*/,
286 const MCDisassembler *Decoder) {
287 assert(isUInt<10>(Imm) && "10-bit encoding expected");
288 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
289
290 bool IsHi = Imm & (1 << 9);
291 unsigned RegIdx = Imm & 0xff;
292 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
293 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
294}
295
296static DecodeStatus
297DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
298 const MCDisassembler *Decoder) {
299 assert(isUInt<8>(Imm) && "8-bit encoding expected");
300
301 bool IsHi = Imm & (1 << 7);
302 unsigned RegIdx = Imm & 0x7f;
303 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
304 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
305}
306
307static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
308 uint64_t /*Addr*/,
309 const MCDisassembler *Decoder) {
310 assert(isUInt<9>(Imm) && "9-bit encoding expected");
311
312 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
313 bool IsVGPR = Imm & (1 << 8);
314 if (IsVGPR) {
315 bool IsHi = Imm & (1 << 7);
316 unsigned RegIdx = Imm & 0x7f;
317 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
318 }
319 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Width: AMDGPUDisassembler::OPW16,
320 Val: Imm & 0xFF, MandatoryLiteral: false, ImmWidth: 16));
321}
322
323static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
324 uint64_t /*Addr*/,
325 const MCDisassembler *Decoder) {
326 assert(isUInt<10>(Imm) && "10-bit encoding expected");
327
328 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
329 bool IsVGPR = Imm & (1 << 8);
330 if (IsVGPR) {
331 bool IsHi = Imm & (1 << 9);
332 unsigned RegIdx = Imm & 0xff;
333 return addOperand(Inst, Opnd: DAsm->createVGPR16Operand(RegIdx, IsHi));
334 }
335 return addOperand(Inst, Opnd: DAsm->decodeNonVGPRSrcOp(Width: AMDGPUDisassembler::OPW16,
336 Val: Imm & 0xFF, MandatoryLiteral: false, ImmWidth: 16));
337}
338
339static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
340 uint64_t Addr,
341 const MCDisassembler *Decoder) {
342 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
343 return addOperand(Inst, Opnd: DAsm->decodeMandatoryLiteralConstant(Imm));
344}
345
346static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
347 uint64_t Addr, const void *Decoder) {
348 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
349 return addOperand(Inst, Opnd: DAsm->decodeVOPDDstYOp(Inst, Val));
350}
351
352static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
353 const MCRegisterInfo *MRI) {
354 if (OpIdx < 0)
355 return false;
356
357 const MCOperand &Op = Inst.getOperand(i: OpIdx);
358 if (!Op.isReg())
359 return false;
360
361 unsigned Sub = MRI->getSubReg(Reg: Op.getReg(), AMDGPU::Idx: sub0);
362 auto Reg = Sub ? Sub : Op.getReg();
363 return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
364}
365
366static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
367 AMDGPUDisassembler::OpWidthTy Opw,
368 const MCDisassembler *Decoder) {
369 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
370 if (!DAsm->isGFX90A()) {
371 Imm &= 511;
372 } else {
373 // If atomic has both vdata and vdst their register classes are tied.
374 // The bit is decoded along with the vdst, first operand. We need to
375 // change register class to AGPR if vdst was AGPR.
376 // If a DS instruction has both data0 and data1 their register classes
377 // are also tied.
378 unsigned Opc = Inst.getOpcode();
379 uint64_t TSFlags = DAsm->getMCII()->get(Opcode: Opc).TSFlags;
380 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
381 : AMDGPU::OpName::vdata;
382 const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
383 int DataIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: DataNameIdx);
384 if ((int)Inst.getNumOperands() == DataIdx) {
385 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
386 if (IsAGPROperand(Inst, OpIdx: DstIdx, MRI))
387 Imm |= 512;
388 }
389
390 if (TSFlags & SIInstrFlags::DS) {
391 int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
392 if ((int)Inst.getNumOperands() == Data2Idx &&
393 IsAGPROperand(Inst, OpIdx: DataIdx, MRI))
394 Imm |= 512;
395 }
396 }
397 return addOperand(Inst, Opnd: DAsm->decodeSrcOp(Width: Opw, Val: Imm | 256));
398}
399
400template <AMDGPUDisassembler::OpWidthTy Opw>
401static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
402 uint64_t /* Addr */,
403 const MCDisassembler *Decoder) {
404 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
405}
406
407static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
408 uint64_t Addr,
409 const MCDisassembler *Decoder) {
410 assert(Imm < (1 << 9) && "9-bit encoding");
411 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
412 return addOperand(Inst,
413 Opnd: DAsm->decodeSrcOp(Width: AMDGPUDisassembler::OPW64, Val: Imm, MandatoryLiteral: false, ImmWidth: 64,
414 Sema: AMDGPU::OperandSemantics::FP64));
415}
416
417#define DECODE_SDWA(DecName) \
418DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
419
420DECODE_SDWA(Src32)
421DECODE_SDWA(Src16)
422DECODE_SDWA(VopcDst)
423
424#include "AMDGPUGenDisassemblerTables.inc"
425
426//===----------------------------------------------------------------------===//
427//
428//===----------------------------------------------------------------------===//
429
430template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
431 assert(Bytes.size() >= sizeof(T));
432 const auto Res =
433 support::endian::read<T, llvm::endianness::little>(Bytes.data());
434 Bytes = Bytes.slice(N: sizeof(T));
435 return Res;
436}
437
438static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
439 assert(Bytes.size() >= 12);
440 uint64_t Lo =
441 support::endian::read<uint64_t, llvm::endianness::little>(P: Bytes.data());
442 Bytes = Bytes.slice(N: 8);
443 uint64_t Hi =
444 support::endian::read<uint32_t, llvm::endianness::little>(P: Bytes.data());
445 Bytes = Bytes.slice(N: 4);
446 return DecoderUInt128(Lo, Hi);
447}
448
449DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
450 ArrayRef<uint8_t> Bytes_,
451 uint64_t Address,
452 raw_ostream &CS) const {
453 unsigned MaxInstBytesNum = std::min(a: (size_t)TargetMaxInstBytes, b: Bytes_.size());
454 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
455
456 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
457 // there are fewer bytes left). This will be overridden on success.
458 Size = std::min(a: (size_t)4, b: Bytes_.size());
459
460 do {
461 // ToDo: better to switch encoding length using some bit predicate
462 // but it is unknown yet, so try all we can
463
464 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
465 // encodings
466 if (isGFX11Plus() && Bytes.size() >= 12 ) {
467 DecoderUInt128 DecW = eat12Bytes(Bytes);
468
469 if (isGFX11() &&
470 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
471 DecW, Address, CS))
472 break;
473
474 if (isGFX12() &&
475 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
476 DecW, Address, CS))
477 break;
478
479 if (isGFX12() &&
480 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
481 break;
482 }
483
484 // Reinitialize Bytes
485 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
486
487 if (Bytes.size() >= 8) {
488 const uint64_t QW = eatBytes<uint64_t>(Bytes);
489
490 if (STI.hasFeature(AMDGPU::Feature: FeatureGFX10_BEncoding) &&
491 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
492 break;
493
494 if (STI.hasFeature(AMDGPU::Feature: FeatureUnpackedD16VMem) &&
495 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
496 break;
497
498 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
499 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
500 // table first so we print the correct name.
501 if (STI.hasFeature(AMDGPU::Feature: FeatureFmaMixInsts) &&
502 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
503 break;
504
505 if (STI.hasFeature(AMDGPU::Feature: FeatureGFX940Insts) &&
506 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
507 break;
508
509 if (STI.hasFeature(AMDGPU::Feature: FeatureGFX90AInsts) &&
510 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
511 break;
512
513 if ((isVI() || isGFX9()) &&
514 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
515 break;
516
517 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
518 break;
519
520 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
521 break;
522
523 if (isGFX12() &&
524 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
525 Address, CS))
526 break;
527
528 if (isGFX11() &&
529 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
530 Address, CS))
531 break;
532
533 if (isGFX11() &&
534 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
535 break;
536
537 if (isGFX12() &&
538 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
539 break;
540 }
541
542 // Reinitialize Bytes
543 Bytes = Bytes_.slice(N: 0, M: MaxInstBytesNum);
544
545 // Try decode 32-bit instruction
546 if (Bytes.size() >= 4) {
547 const uint32_t DW = eatBytes<uint32_t>(Bytes);
548
549 if ((isVI() || isGFX9()) &&
550 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
551 break;
552
553 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
554 break;
555
556 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
557 break;
558
559 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
560 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
561 break;
562
563 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
564 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
565 break;
566
567 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
568 break;
569
570 if (isGFX11() &&
571 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
572 Address, CS))
573 break;
574
575 if (isGFX12() &&
576 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
577 Address, CS))
578 break;
579 }
580
581 return MCDisassembler::Fail;
582 } while (false);
583
584 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
585 if (isMacDPP(MI))
586 convertMacDPPInst(MI);
587
588 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
589 convertVOP3PDPPInst(MI);
590 else if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
591 AMDGPU::isVOPC64DPP(Opc: MI.getOpcode()))
592 convertVOPCDPPInst(MI); // Special VOP3 case
593 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
594 -1)
595 convertDPP8Inst(MI);
596 else if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
597 convertVOP3DPPInst(MI); // Regular VOP3 case
598 }
599
600 if (AMDGPU::isMAC(Opc: MI.getOpcode())) {
601 // Insert dummy unused src2_modifiers.
602 insertNamedMCOperand(MI, MCOperand::createImm(0),
603 AMDGPU::OpName::src2_modifiers);
604 }
605
606 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
607 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
608 // Insert dummy unused src2_modifiers.
609 insertNamedMCOperand(MI, MCOperand::createImm(0),
610 AMDGPU::OpName::src2_modifiers);
611 }
612
613 if ((MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
614 !AMDGPU::hasGDS(STI)) {
615 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
616 }
617
618 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
619 (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
620 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
621 AMDGPU::OpName::cpol);
622 if (CPolPos != -1) {
623 unsigned CPol =
624 (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
625 AMDGPU::CPol::GLC : 0;
626 if (MI.getNumOperands() <= (unsigned)CPolPos) {
627 insertNamedMCOperand(MI, MCOperand::createImm(CPol),
628 AMDGPU::OpName::cpol);
629 } else if (CPol) {
630 MI.getOperand(i: CPolPos).setImm(MI.getOperand(i: CPolPos).getImm() | CPol);
631 }
632 }
633 }
634
635 if ((MCII->get(MI.getOpcode()).TSFlags &
636 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
637 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
638 // GFX90A lost TFE, its place is occupied by ACC.
639 int TFEOpIdx =
640 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
641 if (TFEOpIdx != -1) {
642 auto TFEIter = MI.begin();
643 std::advance(i&: TFEIter, n: TFEOpIdx);
644 MI.insert(I: TFEIter, Op: MCOperand::createImm(Val: 0));
645 }
646 }
647
648 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
649 (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
650 int SWZOpIdx =
651 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
652 if (SWZOpIdx != -1) {
653 auto SWZIter = MI.begin();
654 std::advance(i&: SWZIter, n: SWZOpIdx);
655 MI.insert(I: SWZIter, Op: MCOperand::createImm(Val: 0));
656 }
657 }
658
659 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
660 int VAddr0Idx =
661 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
662 int RsrcIdx =
663 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
664 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
665 if (VAddr0Idx >= 0 && NSAArgs > 0) {
666 unsigned NSAWords = (NSAArgs + 3) / 4;
667 if (Bytes.size() < 4 * NSAWords)
668 return MCDisassembler::Fail;
669 for (unsigned i = 0; i < NSAArgs; ++i) {
670 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
671 auto VAddrRCID =
672 MCII->get(Opcode: MI.getOpcode()).operands()[VAddrIdx].RegClass;
673 MI.insert(I: MI.begin() + VAddrIdx, Op: createRegOperand(RegClassID: VAddrRCID, Val: Bytes[i]));
674 }
675 Bytes = Bytes.slice(N: 4 * NSAWords);
676 }
677
678 convertMIMGInst(MI);
679 }
680
681 if (MCII->get(Opcode: MI.getOpcode()).TSFlags &
682 (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
683 convertMIMGInst(MI);
684
685 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
686 convertEXPInst(MI);
687
688 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
689 convertVINTERPInst(MI);
690
691 if (MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
692 convertSDWAInst(MI);
693
694 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
695 AMDGPU::OpName::vdst_in);
696 if (VDstIn_Idx != -1) {
697 int Tied = MCII->get(Opcode: MI.getOpcode()).getOperandConstraint(OpNum: VDstIn_Idx,
698 Constraint: MCOI::OperandConstraint::TIED_TO);
699 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
700 !MI.getOperand(i: VDstIn_Idx).isReg() ||
701 MI.getOperand(i: VDstIn_Idx).getReg() != MI.getOperand(i: Tied).getReg())) {
702 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
703 MI.erase(I: &MI.getOperand(i: VDstIn_Idx));
704 insertNamedMCOperand(MI,
705 MCOperand::createReg(MI.getOperand(Tied).getReg()),
706 AMDGPU::OpName::vdst_in);
707 }
708 }
709
710 int ImmLitIdx =
711 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
712 bool IsSOPK = MCII->get(Opcode: MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
713 if (ImmLitIdx != -1 && !IsSOPK)
714 convertFMAanyK(MI, ImmLitIdx);
715
716 Size = MaxInstBytesNum - Bytes.size();
717 return MCDisassembler::Success;
718}
719
720void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
721 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
722 // The MCInst still has these fields even though they are no longer encoded
723 // in the GFX11 instruction.
724 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
725 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
726 }
727}
728
729void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
730 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
731 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
732 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
733 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
734 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
735 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
736 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
737 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
738 // The MCInst has this field that is not directly encoded in the
739 // instruction.
740 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
741 }
742}
743
744void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
745 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
746 STI.hasFeature(AMDGPU::FeatureGFX10)) {
747 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
748 // VOPC - insert clamp
749 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
750 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
751 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
752 if (SDst != -1) {
753 // VOPC - insert VCC register as sdst
754 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
755 AMDGPU::OpName::sdst);
756 } else {
757 // VOP1/2 - insert omod if present in instruction
758 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
759 }
760 }
761}
762
763struct VOPModifiers {
764 unsigned OpSel = 0;
765 unsigned OpSelHi = 0;
766 unsigned NegLo = 0;
767 unsigned NegHi = 0;
768};
769
770// Reconstruct values of VOP3/VOP3P operands such as op_sel.
771// Note that these values do not affect disassembler output,
772// so this is only necessary for consistency with src_modifiers.
773static VOPModifiers collectVOPModifiers(const MCInst &MI,
774 bool IsVOP3P = false) {
775 VOPModifiers Modifiers;
776 unsigned Opc = MI.getOpcode();
777 const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
778 AMDGPU::OpName::src1_modifiers,
779 AMDGPU::OpName::src2_modifiers};
780 for (int J = 0; J < 3; ++J) {
781 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: ModOps[J]);
782 if (OpIdx == -1)
783 continue;
784
785 unsigned Val = MI.getOperand(i: OpIdx).getImm();
786
787 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
788 if (IsVOP3P) {
789 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
790 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
791 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
792 } else if (J == 0) {
793 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
794 }
795 }
796
797 return Modifiers;
798}
799
800// Instructions decode the op_sel/suffix bits into the src_modifier
801// operands. Copy those bits into the src operands for true16 VGPRs.
802void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
803 const unsigned Opc = MI.getOpcode();
804 const MCRegisterClass &ConversionRC =
805 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
806 constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
807 {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
808 SISrcMods::OP_SEL_0},
809 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
810 SISrcMods::OP_SEL_0},
811 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
812 SISrcMods::OP_SEL_0},
813 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
814 SISrcMods::DST_OP_SEL}}};
815 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
816 int OpIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: OpName);
817 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opcode: Opc, NamedIdx: OpModsName);
818 if (OpIdx == -1 || OpModsIdx == -1)
819 continue;
820 MCOperand &Op = MI.getOperand(i: OpIdx);
821 if (!Op.isReg())
822 continue;
823 if (!ConversionRC.contains(Reg: Op.getReg()))
824 continue;
825 unsigned OpEnc = MRI.getEncodingValue(RegNo: Op.getReg());
826 const MCOperand &OpMods = MI.getOperand(i: OpModsIdx);
827 unsigned ModVal = OpMods.getImm();
828 if (ModVal & OpSelMask) { // isHi
829 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
830 Op.setReg(ConversionRC.getRegister(i: RegIdx * 2 + 1));
831 }
832 }
833}
834
835// MAC opcodes have special old and src2 operands.
836// src2 is tied to dst, while old is not tied (but assumed to be).
837bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
838 constexpr int DST_IDX = 0;
839 auto Opcode = MI.getOpcode();
840 const auto &Desc = MCII->get(Opcode);
841 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
842
843 if (OldIdx != -1 && Desc.getOperandConstraint(
844 OpNum: OldIdx, Constraint: MCOI::OperandConstraint::TIED_TO) == -1) {
845 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
846 assert(Desc.getOperandConstraint(
847 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
848 MCOI::OperandConstraint::TIED_TO) == DST_IDX);
849 (void)DST_IDX;
850 return true;
851 }
852
853 return false;
854}
855
856// Create dummy old operand and insert dummy unused src2_modifiers
857void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
858 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
859 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
860 insertNamedMCOperand(MI, MCOperand::createImm(0),
861 AMDGPU::OpName::src2_modifiers);
862}
863
864void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
865 unsigned Opc = MI.getOpcode();
866
867 int VDstInIdx =
868 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
869 if (VDstInIdx != -1)
870 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
871
872 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
873 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12)
874 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
875
876 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
877 if (MI.getNumOperands() < DescNumOps &&
878 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
879 convertTrue16OpSel(MI);
880 auto Mods = collectVOPModifiers(MI);
881 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
882 AMDGPU::OpName::op_sel);
883 } else {
884 // Insert dummy unused src modifiers.
885 if (MI.getNumOperands() < DescNumOps &&
886 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
887 insertNamedMCOperand(MI, MCOperand::createImm(0),
888 AMDGPU::OpName::src0_modifiers);
889
890 if (MI.getNumOperands() < DescNumOps &&
891 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
892 insertNamedMCOperand(MI, MCOperand::createImm(0),
893 AMDGPU::OpName::src1_modifiers);
894 }
895}
896
897void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
898 convertTrue16OpSel(MI);
899
900 int VDstInIdx =
901 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
902 if (VDstInIdx != -1)
903 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
904
905 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
906 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12)
907 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::src2);
908
909 unsigned Opc = MI.getOpcode();
910 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
911 if (MI.getNumOperands() < DescNumOps &&
912 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
913 auto Mods = collectVOPModifiers(MI);
914 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
915 AMDGPU::OpName::op_sel);
916 }
917}
918
919// Note that before gfx10, the MIMG encoding provided no information about
920// VADDR size. Consequently, decoded instructions always show address as if it
921// has 1 dword, which could be not really so.
922void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
923 auto TSFlags = MCII->get(Opcode: MI.getOpcode()).TSFlags;
924
925 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
926 AMDGPU::OpName::vdst);
927
928 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
929 AMDGPU::OpName::vdata);
930 int VAddr0Idx =
931 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
932 int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
933 : AMDGPU::OpName::rsrc;
934 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode: MI.getOpcode(), NamedIdx: RsrcOpName);
935 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
936 AMDGPU::OpName::dmask);
937
938 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
939 AMDGPU::OpName::tfe);
940 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
941 AMDGPU::OpName::d16);
942
943 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc: MI.getOpcode());
944 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
945 AMDGPU::getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode);
946
947 assert(VDataIdx != -1);
948 if (BaseOpcode->BVH) {
949 // Add A16 operand for intersect_ray instructions
950 addOperand(Inst&: MI, Opnd: MCOperand::createImm(Val: BaseOpcode->A16));
951 return;
952 }
953
954 bool IsAtomic = (VDstIdx != -1);
955 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
956 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
957 bool IsNSA = false;
958 bool IsPartialNSA = false;
959 unsigned AddrSize = Info->VAddrDwords;
960
961 if (isGFX10Plus()) {
962 unsigned DimIdx =
963 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
964 int A16Idx =
965 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
966 const AMDGPU::MIMGDimInfo *Dim =
967 AMDGPU::getMIMGDimInfoByEncoding(DimEnc: MI.getOperand(i: DimIdx).getImm());
968 const bool IsA16 = (A16Idx != -1 && MI.getOperand(i: A16Idx).getImm());
969
970 AddrSize =
971 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, IsG16Supported: AMDGPU::hasG16(STI));
972
973 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
974 // VIMAGE insts other than BVH never use vaddr4.
975 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
976 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
977 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
978 if (!IsNSA) {
979 if (!IsVSample && AddrSize > 12)
980 AddrSize = 16;
981 } else {
982 if (AddrSize > Info->VAddrDwords) {
983 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
984 // The NSA encoding does not contain enough operands for the
985 // combination of base opcode / dimension. Should this be an error?
986 return;
987 }
988 IsPartialNSA = true;
989 }
990 }
991 }
992
993 unsigned DMask = MI.getOperand(i: DMaskIdx).getImm() & 0xf;
994 unsigned DstSize = IsGather4 ? 4 : std::max(a: llvm::popcount(Value: DMask), b: 1);
995
996 bool D16 = D16Idx >= 0 && MI.getOperand(i: D16Idx).getImm();
997 if (D16 && AMDGPU::hasPackedD16(STI)) {
998 DstSize = (DstSize + 1) / 2;
999 }
1000
1001 if (TFEIdx != -1 && MI.getOperand(i: TFEIdx).getImm())
1002 DstSize += 1;
1003
1004 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1005 return;
1006
1007 int NewOpcode =
1008 AMDGPU::getMIMGOpcode(BaseOpcode: Info->BaseOpcode, MIMGEncoding: Info->MIMGEncoding, VDataDwords: DstSize, VAddrDwords: AddrSize);
1009 if (NewOpcode == -1)
1010 return;
1011
1012 // Widen the register to the correct number of enabled channels.
1013 unsigned NewVdata = AMDGPU::NoRegister;
1014 if (DstSize != Info->VDataDwords) {
1015 auto DataRCID = MCII->get(Opcode: NewOpcode).operands()[VDataIdx].RegClass;
1016
1017 // Get first subregister of VData
1018 unsigned Vdata0 = MI.getOperand(i: VDataIdx).getReg();
1019 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1020 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1021
1022 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1023 &MRI.getRegClass(DataRCID));
1024 if (NewVdata == AMDGPU::NoRegister) {
1025 // It's possible to encode this such that the low register + enabled
1026 // components exceeds the register count.
1027 return;
1028 }
1029 }
1030
1031 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1032 // If using partial NSA on GFX11+ widen last address register.
1033 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1034 unsigned NewVAddrSA = AMDGPU::NoRegister;
1035 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1036 AddrSize != Info->VAddrDwords) {
1037 unsigned VAddrSA = MI.getOperand(i: VAddrSAIdx).getReg();
1038 unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1039 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1040
1041 auto AddrRCID = MCII->get(Opcode: NewOpcode).operands()[VAddrSAIdx].RegClass;
1042 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1043 &MRI.getRegClass(AddrRCID));
1044 if (!NewVAddrSA)
1045 return;
1046 }
1047
1048 MI.setOpcode(NewOpcode);
1049
1050 if (NewVdata != AMDGPU::NoRegister) {
1051 MI.getOperand(i: VDataIdx) = MCOperand::createReg(Reg: NewVdata);
1052
1053 if (IsAtomic) {
1054 // Atomic operations have an additional operand (a copy of data)
1055 MI.getOperand(i: VDstIdx) = MCOperand::createReg(Reg: NewVdata);
1056 }
1057 }
1058
1059 if (NewVAddrSA) {
1060 MI.getOperand(i: VAddrSAIdx) = MCOperand::createReg(Reg: NewVAddrSA);
1061 } else if (IsNSA) {
1062 assert(AddrSize <= Info->VAddrDwords);
1063 MI.erase(First: MI.begin() + VAddr0Idx + AddrSize,
1064 Last: MI.begin() + VAddr0Idx + Info->VAddrDwords);
1065 }
1066}
1067
1068// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1069// decoder only adds to src_modifiers, so manually add the bits to the other
1070// operands.
1071void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1072 unsigned Opc = MI.getOpcode();
1073 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1074 auto Mods = collectVOPModifiers(MI, IsVOP3P: true);
1075
1076 if (MI.getNumOperands() < DescNumOps &&
1077 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1078 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1079
1080 if (MI.getNumOperands() < DescNumOps &&
1081 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1082 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1083 AMDGPU::OpName::op_sel);
1084 if (MI.getNumOperands() < DescNumOps &&
1085 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1086 insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1087 AMDGPU::OpName::op_sel_hi);
1088 if (MI.getNumOperands() < DescNumOps &&
1089 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1090 insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1091 AMDGPU::OpName::neg_lo);
1092 if (MI.getNumOperands() < DescNumOps &&
1093 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1094 insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1095 AMDGPU::OpName::neg_hi);
1096}
1097
1098// Create dummy old operand and insert optional operands
1099void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1100 unsigned Opc = MI.getOpcode();
1101 unsigned DescNumOps = MCII->get(Opcode: Opc).getNumOperands();
1102
1103 if (MI.getNumOperands() < DescNumOps &&
1104 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1105 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1106
1107 if (MI.getNumOperands() < DescNumOps &&
1108 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1109 insertNamedMCOperand(MI, MCOperand::createImm(0),
1110 AMDGPU::OpName::src0_modifiers);
1111
1112 if (MI.getNumOperands() < DescNumOps &&
1113 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1114 insertNamedMCOperand(MI, MCOperand::createImm(0),
1115 AMDGPU::OpName::src1_modifiers);
1116}
1117
1118void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1119 assert(HasLiteral && "Should have decoded a literal");
1120 const MCInstrDesc &Desc = MCII->get(Opcode: MI.getOpcode());
1121 unsigned DescNumOps = Desc.getNumOperands();
1122 insertNamedMCOperand(MI, MCOperand::createImm(Literal),
1123 AMDGPU::OpName::immDeferred);
1124 assert(DescNumOps == MI.getNumOperands());
1125 for (unsigned I = 0; I < DescNumOps; ++I) {
1126 auto &Op = MI.getOperand(i: I);
1127 auto OpType = Desc.operands()[I].OperandType;
1128 bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1129 OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
1130 if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1131 IsDeferredOp)
1132 Op.setImm(Literal);
1133 }
1134}
1135
1136const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1137 return getContext().getRegisterInfo()->
1138 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1139}
1140
1141inline
1142MCOperand AMDGPUDisassembler::errOperand(unsigned V,
1143 const Twine& ErrMsg) const {
1144 *CommentStream << "Error: " + ErrMsg;
1145
1146 // ToDo: add support for error operands to MCInst.h
1147 // return MCOperand::createError(V);
1148 return MCOperand();
1149}
1150
1151inline
1152MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1153 return MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: RegId, STI));
1154}
1155
1156inline
1157MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
1158 unsigned Val) const {
1159 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1160 if (Val >= RegCl.getNumRegs())
1161 return errOperand(V: Val, ErrMsg: Twine(getRegClassName(RegClassID)) +
1162 ": unknown register " + Twine(Val));
1163 return createRegOperand(RegCl.getRegister(Val));
1164}
1165
1166inline
1167MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
1168 unsigned Val) const {
1169 // ToDo: SI/CI have 104 SGPRs, VI - 102
1170 // Valery: here we accepting as much as we can, let assembler sort it out
1171 int shift = 0;
1172 switch (SRegClassID) {
1173 case AMDGPU::SGPR_32RegClassID:
1174 case AMDGPU::TTMP_32RegClassID:
1175 break;
1176 case AMDGPU::SGPR_64RegClassID:
1177 case AMDGPU::TTMP_64RegClassID:
1178 shift = 1;
1179 break;
1180 case AMDGPU::SGPR_96RegClassID:
1181 case AMDGPU::TTMP_96RegClassID:
1182 case AMDGPU::SGPR_128RegClassID:
1183 case AMDGPU::TTMP_128RegClassID:
1184 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1185 // this bundle?
1186 case AMDGPU::SGPR_256RegClassID:
1187 case AMDGPU::TTMP_256RegClassID:
1188 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1189 // this bundle?
1190 case AMDGPU::SGPR_288RegClassID:
1191 case AMDGPU::TTMP_288RegClassID:
1192 case AMDGPU::SGPR_320RegClassID:
1193 case AMDGPU::TTMP_320RegClassID:
1194 case AMDGPU::SGPR_352RegClassID:
1195 case AMDGPU::TTMP_352RegClassID:
1196 case AMDGPU::SGPR_384RegClassID:
1197 case AMDGPU::TTMP_384RegClassID:
1198 case AMDGPU::SGPR_512RegClassID:
1199 case AMDGPU::TTMP_512RegClassID:
1200 shift = 2;
1201 break;
1202 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1203 // this bundle?
1204 default:
1205 llvm_unreachable("unhandled register class");
1206 }
1207
1208 if (Val % (1 << shift)) {
1209 *CommentStream << "Warning: " << getRegClassName(RegClassID: SRegClassID)
1210 << ": scalar reg isn't aligned " << Val;
1211 }
1212
1213 return createRegOperand(RegClassID: SRegClassID, Val: Val >> shift);
1214}
1215
1216MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1217 bool IsHi) const {
1218 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1219 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1220}
1221
1222// Decode Literals for insts which always have a literal in the encoding
1223MCOperand
1224AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1225 if (HasLiteral) {
1226 assert(
1227 AMDGPU::hasVOPD(STI) &&
1228 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1229 if (Literal != Val)
1230 return errOperand(V: Val, ErrMsg: "More than one unique literal is illegal");
1231 }
1232 HasLiteral = true;
1233 Literal = Val;
1234 return MCOperand::createImm(Val: Literal);
1235}
1236
1237MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
1238 // For now all literal constants are supposed to be unsigned integer
1239 // ToDo: deal with signed/unsigned 64-bit integer constants
1240 // ToDo: deal with float/double constants
1241 if (!HasLiteral) {
1242 if (Bytes.size() < 4) {
1243 return errOperand(V: 0, ErrMsg: "cannot read literal, inst bytes left " +
1244 Twine(Bytes.size()));
1245 }
1246 HasLiteral = true;
1247 Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1248 if (ExtendFP64)
1249 Literal64 <<= 32;
1250 }
1251 return MCOperand::createImm(Val: ExtendFP64 ? Literal64 : Literal);
1252}
1253
1254MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
1255 using namespace AMDGPU::EncValues;
1256
1257 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1258 return MCOperand::createImm(Val: (Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1259 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1260 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1261 // Cast prevents negative overflow.
1262}
1263
1264static int64_t getInlineImmVal32(unsigned Imm) {
1265 switch (Imm) {
1266 case 240:
1267 return llvm::bit_cast<uint32_t>(from: 0.5f);
1268 case 241:
1269 return llvm::bit_cast<uint32_t>(from: -0.5f);
1270 case 242:
1271 return llvm::bit_cast<uint32_t>(from: 1.0f);
1272 case 243:
1273 return llvm::bit_cast<uint32_t>(from: -1.0f);
1274 case 244:
1275 return llvm::bit_cast<uint32_t>(from: 2.0f);
1276 case 245:
1277 return llvm::bit_cast<uint32_t>(from: -2.0f);
1278 case 246:
1279 return llvm::bit_cast<uint32_t>(from: 4.0f);
1280 case 247:
1281 return llvm::bit_cast<uint32_t>(from: -4.0f);
1282 case 248: // 1 / (2 * PI)
1283 return 0x3e22f983;
1284 default:
1285 llvm_unreachable("invalid fp inline imm");
1286 }
1287}
1288
1289static int64_t getInlineImmVal64(unsigned Imm) {
1290 switch (Imm) {
1291 case 240:
1292 return llvm::bit_cast<uint64_t>(from: 0.5);
1293 case 241:
1294 return llvm::bit_cast<uint64_t>(from: -0.5);
1295 case 242:
1296 return llvm::bit_cast<uint64_t>(from: 1.0);
1297 case 243:
1298 return llvm::bit_cast<uint64_t>(from: -1.0);
1299 case 244:
1300 return llvm::bit_cast<uint64_t>(from: 2.0);
1301 case 245:
1302 return llvm::bit_cast<uint64_t>(from: -2.0);
1303 case 246:
1304 return llvm::bit_cast<uint64_t>(from: 4.0);
1305 case 247:
1306 return llvm::bit_cast<uint64_t>(from: -4.0);
1307 case 248: // 1 / (2 * PI)
1308 return 0x3fc45f306dc9c882;
1309 default:
1310 llvm_unreachable("invalid fp inline imm");
1311 }
1312}
1313
1314static int64_t getInlineImmValF16(unsigned Imm) {
1315 switch (Imm) {
1316 case 240:
1317 return 0x3800;
1318 case 241:
1319 return 0xB800;
1320 case 242:
1321 return 0x3C00;
1322 case 243:
1323 return 0xBC00;
1324 case 244:
1325 return 0x4000;
1326 case 245:
1327 return 0xC000;
1328 case 246:
1329 return 0x4400;
1330 case 247:
1331 return 0xC400;
1332 case 248: // 1 / (2 * PI)
1333 return 0x3118;
1334 default:
1335 llvm_unreachable("invalid fp inline imm");
1336 }
1337}
1338
1339static int64_t getInlineImmValBF16(unsigned Imm) {
1340 switch (Imm) {
1341 case 240:
1342 return 0x3F00;
1343 case 241:
1344 return 0xBF00;
1345 case 242:
1346 return 0x3F80;
1347 case 243:
1348 return 0xBF80;
1349 case 244:
1350 return 0x4000;
1351 case 245:
1352 return 0xC000;
1353 case 246:
1354 return 0x4080;
1355 case 247:
1356 return 0xC080;
1357 case 248: // 1 / (2 * PI)
1358 return 0x3E22;
1359 default:
1360 llvm_unreachable("invalid fp inline imm");
1361 }
1362}
1363
1364static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1365 return (Sema == AMDGPU::OperandSemantics::BF16) ? getInlineImmValBF16(Imm)
1366 : getInlineImmValF16(Imm);
1367}
1368
1369MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1370 AMDGPU::OperandSemantics Sema) {
1371 assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN &&
1372 Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1373
1374 // ToDo: case 248: 1/(2*PI) - is allowed only on VI
1375 // ImmWidth 0 is a default case where operand should not allow immediates.
1376 // Imm value is still decoded into 32 bit immediate operand, inst printer will
1377 // use it to print verbose error message.
1378 switch (ImmWidth) {
1379 case 0:
1380 case 32:
1381 return MCOperand::createImm(Val: getInlineImmVal32(Imm));
1382 case 64:
1383 return MCOperand::createImm(Val: getInlineImmVal64(Imm));
1384 case 16:
1385 return MCOperand::createImm(Val: getInlineImmVal16(Imm, Sema));
1386 default:
1387 llvm_unreachable("implement me");
1388 }
1389}
1390
1391unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
1392 using namespace AMDGPU;
1393
1394 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1395 switch (Width) {
1396 default: // fall
1397 case OPW32:
1398 case OPW16:
1399 case OPWV216:
1400 return VGPR_32RegClassID;
1401 case OPW64:
1402 case OPWV232: return VReg_64RegClassID;
1403 case OPW96: return VReg_96RegClassID;
1404 case OPW128: return VReg_128RegClassID;
1405 case OPW160: return VReg_160RegClassID;
1406 case OPW256: return VReg_256RegClassID;
1407 case OPW288: return VReg_288RegClassID;
1408 case OPW320: return VReg_320RegClassID;
1409 case OPW352: return VReg_352RegClassID;
1410 case OPW384: return VReg_384RegClassID;
1411 case OPW512: return VReg_512RegClassID;
1412 case OPW1024: return VReg_1024RegClassID;
1413 }
1414}
1415
1416unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1417 using namespace AMDGPU;
1418
1419 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1420 switch (Width) {
1421 default: // fall
1422 case OPW32:
1423 case OPW16:
1424 case OPWV216:
1425 return AGPR_32RegClassID;
1426 case OPW64:
1427 case OPWV232: return AReg_64RegClassID;
1428 case OPW96: return AReg_96RegClassID;
1429 case OPW128: return AReg_128RegClassID;
1430 case OPW160: return AReg_160RegClassID;
1431 case OPW256: return AReg_256RegClassID;
1432 case OPW288: return AReg_288RegClassID;
1433 case OPW320: return AReg_320RegClassID;
1434 case OPW352: return AReg_352RegClassID;
1435 case OPW384: return AReg_384RegClassID;
1436 case OPW512: return AReg_512RegClassID;
1437 case OPW1024: return AReg_1024RegClassID;
1438 }
1439}
1440
1441
1442unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
1443 using namespace AMDGPU;
1444
1445 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1446 switch (Width) {
1447 default: // fall
1448 case OPW32:
1449 case OPW16:
1450 case OPWV216:
1451 return SGPR_32RegClassID;
1452 case OPW64:
1453 case OPWV232: return SGPR_64RegClassID;
1454 case OPW96: return SGPR_96RegClassID;
1455 case OPW128: return SGPR_128RegClassID;
1456 case OPW160: return SGPR_160RegClassID;
1457 case OPW256: return SGPR_256RegClassID;
1458 case OPW288: return SGPR_288RegClassID;
1459 case OPW320: return SGPR_320RegClassID;
1460 case OPW352: return SGPR_352RegClassID;
1461 case OPW384: return SGPR_384RegClassID;
1462 case OPW512: return SGPR_512RegClassID;
1463 }
1464}
1465
1466unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
1467 using namespace AMDGPU;
1468
1469 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1470 switch (Width) {
1471 default: // fall
1472 case OPW32:
1473 case OPW16:
1474 case OPWV216:
1475 return TTMP_32RegClassID;
1476 case OPW64:
1477 case OPWV232: return TTMP_64RegClassID;
1478 case OPW128: return TTMP_128RegClassID;
1479 case OPW256: return TTMP_256RegClassID;
1480 case OPW288: return TTMP_288RegClassID;
1481 case OPW320: return TTMP_320RegClassID;
1482 case OPW352: return TTMP_352RegClassID;
1483 case OPW384: return TTMP_384RegClassID;
1484 case OPW512: return TTMP_512RegClassID;
1485 }
1486}
1487
1488int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1489 using namespace AMDGPU::EncValues;
1490
1491 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1492 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1493
1494 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1495}
1496
1497MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
1498 bool MandatoryLiteral,
1499 unsigned ImmWidth,
1500 AMDGPU::OperandSemantics Sema) const {
1501 using namespace AMDGPU::EncValues;
1502
1503 assert(Val < 1024); // enum10
1504
1505 bool IsAGPR = Val & 512;
1506 Val &= 511;
1507
1508 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1509 return createRegOperand(RegClassID: IsAGPR ? getAgprClassId(Width)
1510 : getVgprClassId(Width), Val: Val - VGPR_MIN);
1511 }
1512 return decodeNonVGPRSrcOp(Width, Val: Val & 0xFF, MandatoryLiteral, ImmWidth,
1513 Sema);
1514}
1515
1516MCOperand
1517AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
1518 bool MandatoryLiteral, unsigned ImmWidth,
1519 AMDGPU::OperandSemantics Sema) const {
1520 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1521 // decoded earlier.
1522 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1523 using namespace AMDGPU::EncValues;
1524
1525 if (Val <= SGPR_MAX) {
1526 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1527 static_assert(SGPR_MIN == 0);
1528 return createSRegOperand(SRegClassID: getSgprClassId(Width), Val: Val - SGPR_MIN);
1529 }
1530
1531 int TTmpIdx = getTTmpIdx(Val);
1532 if (TTmpIdx >= 0) {
1533 return createSRegOperand(SRegClassID: getTtmpClassId(Width), Val: TTmpIdx);
1534 }
1535
1536 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
1537 return decodeIntImmed(Imm: Val);
1538
1539 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1540 return decodeFPImmed(ImmWidth, Imm: Val, Sema);
1541
1542 if (Val == LITERAL_CONST) {
1543 if (MandatoryLiteral)
1544 // Keep a sentinel value for deferred setting
1545 return MCOperand::createImm(Val: LITERAL_CONST);
1546 else
1547 return decodeLiteralConstant(ExtendFP64: Sema == AMDGPU::OperandSemantics::FP64);
1548 }
1549
1550 switch (Width) {
1551 case OPW32:
1552 case OPW16:
1553 case OPWV216:
1554 return decodeSpecialReg32(Val);
1555 case OPW64:
1556 case OPWV232:
1557 return decodeSpecialReg64(Val);
1558 default:
1559 llvm_unreachable("unexpected immediate type");
1560 }
1561}
1562
1563// Bit 0 of DstY isn't stored in the instruction, because it's always the
1564// opposite of bit 0 of DstX.
1565MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1566 unsigned Val) const {
1567 int VDstXInd =
1568 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1569 assert(VDstXInd != -1);
1570 assert(Inst.getOperand(VDstXInd).isReg());
1571 unsigned XDstReg = MRI.getEncodingValue(RegNo: Inst.getOperand(i: VDstXInd).getReg());
1572 Val |= ~XDstReg & 1;
1573 auto Width = llvm::AMDGPUDisassembler::OPW32;
1574 return createRegOperand(RegClassID: getVgprClassId(Width), Val);
1575}
1576
1577MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
1578 using namespace AMDGPU;
1579
1580 switch (Val) {
1581 // clang-format off
1582 case 102: return createRegOperand(FLAT_SCR_LO);
1583 case 103: return createRegOperand(FLAT_SCR_HI);
1584 case 104: return createRegOperand(XNACK_MASK_LO);
1585 case 105: return createRegOperand(XNACK_MASK_HI);
1586 case 106: return createRegOperand(VCC_LO);
1587 case 107: return createRegOperand(VCC_HI);
1588 case 108: return createRegOperand(TBA_LO);
1589 case 109: return createRegOperand(TBA_HI);
1590 case 110: return createRegOperand(TMA_LO);
1591 case 111: return createRegOperand(TMA_HI);
1592 case 124:
1593 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1594 case 125:
1595 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
1596 case 126: return createRegOperand(EXEC_LO);
1597 case 127: return createRegOperand(EXEC_HI);
1598 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1599 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1600 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1601 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1602 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1603 case 251: return createRegOperand(SRC_VCCZ);
1604 case 252: return createRegOperand(SRC_EXECZ);
1605 case 253: return createRegOperand(SRC_SCC);
1606 case 254: return createRegOperand(LDS_DIRECT);
1607 default: break;
1608 // clang-format on
1609 }
1610 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
1611}
1612
1613MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
1614 using namespace AMDGPU;
1615
1616 switch (Val) {
1617 case 102: return createRegOperand(FLAT_SCR);
1618 case 104: return createRegOperand(XNACK_MASK);
1619 case 106: return createRegOperand(VCC);
1620 case 108: return createRegOperand(TBA);
1621 case 110: return createRegOperand(TMA);
1622 case 124:
1623 if (isGFX11Plus())
1624 return createRegOperand(SGPR_NULL);
1625 break;
1626 case 125:
1627 if (!isGFX11Plus())
1628 return createRegOperand(SGPR_NULL);
1629 break;
1630 case 126: return createRegOperand(EXEC);
1631 case 235: return createRegOperand(SRC_SHARED_BASE);
1632 case 236: return createRegOperand(SRC_SHARED_LIMIT);
1633 case 237: return createRegOperand(SRC_PRIVATE_BASE);
1634 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1635 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1636 case 251: return createRegOperand(SRC_VCCZ);
1637 case 252: return createRegOperand(SRC_EXECZ);
1638 case 253: return createRegOperand(SRC_SCC);
1639 default: break;
1640 }
1641 return errOperand(V: Val, ErrMsg: "unknown operand encoding " + Twine(Val));
1642}
1643
1644MCOperand
1645AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1646 unsigned ImmWidth,
1647 AMDGPU::OperandSemantics Sema) const {
1648 using namespace AMDGPU::SDWA;
1649 using namespace AMDGPU::EncValues;
1650
1651 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
1652 STI.hasFeature(AMDGPU::FeatureGFX10)) {
1653 // XXX: cast to int is needed to avoid stupid warning:
1654 // compare with unsigned is always true
1655 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1656 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1657 return createRegOperand(RegClassID: getVgprClassId(Width),
1658 Val: Val - SDWA9EncValues::SRC_VGPR_MIN);
1659 }
1660 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1661 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1662 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1663 return createSRegOperand(SRegClassID: getSgprClassId(Width),
1664 Val: Val - SDWA9EncValues::SRC_SGPR_MIN);
1665 }
1666 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1667 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1668 return createSRegOperand(SRegClassID: getTtmpClassId(Width),
1669 Val: Val - SDWA9EncValues::SRC_TTMP_MIN);
1670 }
1671
1672 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1673
1674 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1675 return decodeIntImmed(Imm: SVal);
1676
1677 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1678 return decodeFPImmed(ImmWidth, Imm: SVal, Sema);
1679
1680 return decodeSpecialReg32(Val: SVal);
1681 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1682 return createRegOperand(RegClassID: getVgprClassId(Width), Val);
1683 }
1684 llvm_unreachable("unsupported target");
1685}
1686
1687MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1688 return decodeSDWASrc(Width: OPW16, Val, ImmWidth: 16, Sema: AMDGPU::OperandSemantics::FP16);
1689}
1690
1691MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1692 return decodeSDWASrc(Width: OPW32, Val, ImmWidth: 32, Sema: AMDGPU::OperandSemantics::FP32);
1693}
1694
1695MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1696 using namespace AMDGPU::SDWA;
1697
1698 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
1699 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1700 "SDWAVopcDst should be present only on GFX9+");
1701
1702 bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1703
1704 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1705 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1706
1707 int TTmpIdx = getTTmpIdx(Val);
1708 if (TTmpIdx >= 0) {
1709 auto TTmpClsId = getTtmpClassId(Width: IsWave64 ? OPW64 : OPW32);
1710 return createSRegOperand(SRegClassID: TTmpClsId, Val: TTmpIdx);
1711 } else if (Val > SGPR_MAX) {
1712 return IsWave64 ? decodeSpecialReg64(Val)
1713 : decodeSpecialReg32(Val);
1714 } else {
1715 return createSRegOperand(SRegClassID: getSgprClassId(Width: IsWave64 ? OPW64 : OPW32), Val);
1716 }
1717 } else {
1718 return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1719 }
1720}
1721
1722MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
1723 return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
1724 ? decodeSrcOp(OPW64, Val)
1725 : decodeSrcOp(OPW32, Val);
1726}
1727
1728MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
1729 return decodeSrcOp(Width: OPW32, Val);
1730}
1731
1732MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
1733 if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
1734 return MCOperand();
1735 return MCOperand::createImm(Val);
1736}
1737
1738bool AMDGPUDisassembler::isVI() const {
1739 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1740}
1741
1742bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1743
1744bool AMDGPUDisassembler::isGFX90A() const {
1745 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1746}
1747
1748bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1749
1750bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1751
1752bool AMDGPUDisassembler::isGFX10Plus() const {
1753 return AMDGPU::isGFX10Plus(STI);
1754}
1755
1756bool AMDGPUDisassembler::isGFX11() const {
1757 return STI.hasFeature(AMDGPU::FeatureGFX11);
1758}
1759
1760bool AMDGPUDisassembler::isGFX11Plus() const {
1761 return AMDGPU::isGFX11Plus(STI);
1762}
1763
1764bool AMDGPUDisassembler::isGFX12() const {
1765 return STI.hasFeature(AMDGPU::FeatureGFX12);
1766}
1767
1768bool AMDGPUDisassembler::isGFX12Plus() const {
1769 return AMDGPU::isGFX12Plus(STI);
1770}
1771
1772bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
1773 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1774}
1775
1776bool AMDGPUDisassembler::hasKernargPreload() const {
1777 return AMDGPU::hasKernargPreload(STI);
1778}
1779
1780//===----------------------------------------------------------------------===//
1781// AMDGPU specific symbol handling
1782//===----------------------------------------------------------------------===//
1783
1784/// Print a string describing the reserved bit range specified by Mask with
1785/// offset BaseBytes for use in error comments. Mask is a single continuous
1786/// range of 1s surrounded by zeros. The format here is meant to align with the
1787/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
1788static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1789 SmallString<32> Result;
1790 raw_svector_ostream S(Result);
1791
1792 int TrailingZeros = llvm::countr_zero(Val: Mask);
1793 int PopCount = llvm::popcount(Value: Mask);
1794
1795 if (PopCount == 1) {
1796 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1797 } else {
1798 S << "bits in range ("
1799 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1800 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1801 }
1802
1803 return Result;
1804}
1805
1806#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1807#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1808 do { \
1809 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
1810 } while (0)
1811#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
1812 do { \
1813 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
1814 << GET_FIELD(MASK) << '\n'; \
1815 } while (0)
1816
1817#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1818 do { \
1819 if (FourByteBuffer & (MASK)) { \
1820 return createStringError(std::errc::invalid_argument, \
1821 "kernel descriptor " DESC \
1822 " reserved %s set" MSG, \
1823 getBitRangeFromMask((MASK), 0).c_str()); \
1824 } \
1825 } while (0)
1826
1827#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1828#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1829 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1830#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
1831 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1832#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
1833 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1834
1835// NOLINTNEXTLINE(readability-identifier-naming)
1836Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1837 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1838 using namespace amdhsa;
1839 StringRef Indent = "\t";
1840
1841 // We cannot accurately backward compute #VGPRs used from
1842 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1843 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1844 // simply calculate the inverse of what the assembler does.
1845
1846 uint32_t GranulatedWorkitemVGPRCount =
1847 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1848
1849 uint32_t NextFreeVGPR =
1850 (GranulatedWorkitemVGPRCount + 1) *
1851 AMDGPU::IsaInfo::getVGPREncodingGranule(STI: &STI, EnableWavefrontSize32);
1852
1853 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1854
1855 // We cannot backward compute values used to calculate
1856 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1857 // directives can't be computed:
1858 // .amdhsa_reserve_vcc
1859 // .amdhsa_reserve_flat_scratch
1860 // .amdhsa_reserve_xnack_mask
1861 // They take their respective default values if not specified in the assembly.
1862 //
1863 // GRANULATED_WAVEFRONT_SGPR_COUNT
1864 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1865 //
1866 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1867 // are set to 0. So while disassembling we consider that:
1868 //
1869 // GRANULATED_WAVEFRONT_SGPR_COUNT
1870 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1871 //
1872 // The disassembler cannot recover the original values of those 3 directives.
1873
1874 uint32_t GranulatedWavefrontSGPRCount =
1875 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1876
1877 if (isGFX10Plus())
1878 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1879 "must be zero on gfx10+");
1880
1881 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1882 AMDGPU::IsaInfo::getSGPREncodingGranule(STI: &STI);
1883
1884 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1885 if (!hasArchitectedFlatScratch())
1886 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1887 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1888 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1889
1890 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1891
1892 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1893 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1894 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1895 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1896 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1897 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1898 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1899 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1900
1901 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1902
1903 if (!isGFX12Plus())
1904 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1905 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1906
1907 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1908
1909 if (!isGFX12Plus())
1910 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1911 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1912
1913 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1914 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1915
1916 if (isGFX9Plus())
1917 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1918
1919 if (!isGFX9Plus())
1920 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1921 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1922
1923 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1924
1925 if (!isGFX10Plus())
1926 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1927 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1928
1929 if (isGFX10Plus()) {
1930 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1931 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1932 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1933 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1934 }
1935
1936 if (isGFX12Plus())
1937 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1938 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1939
1940 return true;
1941}
1942
1943// NOLINTNEXTLINE(readability-identifier-naming)
1944Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
1945 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1946 using namespace amdhsa;
1947 StringRef Indent = "\t";
1948 if (hasArchitectedFlatScratch())
1949 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
1950 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1951 else
1952 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
1953 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
1954 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
1955 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
1956 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
1957 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
1958 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
1959 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
1960 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
1961 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
1962 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
1963 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
1964
1965 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
1966 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
1967 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
1968
1969 PRINT_DIRECTIVE(
1970 ".amdhsa_exception_fp_ieee_invalid_op",
1971 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
1972 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
1973 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
1974 PRINT_DIRECTIVE(
1975 ".amdhsa_exception_fp_ieee_div_zero",
1976 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
1977 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
1978 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
1979 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
1980 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
1981 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
1982 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
1983 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
1984 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
1985
1986 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
1987
1988 return true;
1989}
1990
1991// NOLINTNEXTLINE(readability-identifier-naming)
1992Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
1993 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1994 using namespace amdhsa;
1995 StringRef Indent = "\t";
1996 if (isGFX90A()) {
1997 KdStream << Indent << ".amdhsa_accum_offset "
1998 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
1999 << '\n';
2000
2001 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2002
2003 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2004 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2005 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2006 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2007 } else if (isGFX10Plus()) {
2008 // Bits [0-3].
2009 if (!isGFX12Plus()) {
2010 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2011 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2012 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2013 } else {
2014 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2015 "SHARED_VGPR_COUNT",
2016 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2017 }
2018 } else {
2019 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2020 "COMPUTE_PGM_RSRC3",
2021 "must be zero on gfx12+");
2022 }
2023
2024 // Bits [4-11].
2025 if (isGFX11()) {
2026 PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
2027 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2028 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2029 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2030 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2031 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2032 } else if (isGFX12Plus()) {
2033 PRINT_PSEUDO_DIRECTIVE_COMMENT(
2034 "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2035 } else {
2036 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2037 "COMPUTE_PGM_RSRC3",
2038 "must be zero on gfx10");
2039 }
2040
2041 // Bits [12].
2042 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2043 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2044
2045 // Bits [13].
2046 if (isGFX12Plus()) {
2047 PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
2048 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2049 } else {
2050 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2051 "COMPUTE_PGM_RSRC3",
2052 "must be zero on gfx10 or gfx11");
2053 }
2054
2055 // Bits [14-30].
2056 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2057 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2058
2059 // Bits [31].
2060 if (isGFX11Plus()) {
2061 PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
2062 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2063 } else {
2064 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2065 "COMPUTE_PGM_RSRC3",
2066 "must be zero on gfx10");
2067 }
2068 } else if (FourByteBuffer) {
2069 return createStringError(
2070 EC: std::errc::invalid_argument,
2071 Fmt: "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2072 }
2073 return true;
2074}
2075#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2076#undef PRINT_DIRECTIVE
2077#undef GET_FIELD
2078#undef CHECK_RESERVED_BITS_IMPL
2079#undef CHECK_RESERVED_BITS
2080#undef CHECK_RESERVED_BITS_MSG
2081#undef CHECK_RESERVED_BITS_DESC
2082#undef CHECK_RESERVED_BITS_DESC_MSG
2083
2084/// Create an error object to return from onSymbolStart for reserved kernel
2085/// descriptor bits being set.
2086static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2087 const char *Msg = "") {
2088 return createStringError(
2089 EC: std::errc::invalid_argument, Fmt: "kernel descriptor reserved %s set%s%s",
2090 Vals: getBitRangeFromMask(Mask, BaseBytes).c_str(), Vals: *Msg ? ", " : "", Vals: Msg);
2091}
2092
2093/// Create an error object to return from onSymbolStart for reserved kernel
2094/// descriptor bytes being set.
2095static Error createReservedKDBytesError(unsigned BaseInBytes,
2096 unsigned WidthInBytes) {
2097 // Create an error comment in the same format as the "Kernel Descriptor"
2098 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2099 return createStringError(
2100 EC: std::errc::invalid_argument,
2101 Fmt: "kernel descriptor reserved bits in range (%u:%u) set",
2102 Vals: (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, Vals: BaseInBytes * CHAR_BIT);
2103}
2104
2105Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2106 DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2107 raw_string_ostream &KdStream) const {
2108#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2109 do { \
2110 KdStream << Indent << DIRECTIVE " " \
2111 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2112 } while (0)
2113
2114 uint16_t TwoByteBuffer = 0;
2115 uint32_t FourByteBuffer = 0;
2116
2117 StringRef ReservedBytes;
2118 StringRef Indent = "\t";
2119
2120 assert(Bytes.size() == 64);
2121 DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2122
2123 switch (Cursor.tell()) {
2124 case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2125 FourByteBuffer = DE.getU32(C&: Cursor);
2126 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2127 << '\n';
2128 return true;
2129
2130 case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2131 FourByteBuffer = DE.getU32(C&: Cursor);
2132 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2133 << FourByteBuffer << '\n';
2134 return true;
2135
2136 case amdhsa::KERNARG_SIZE_OFFSET:
2137 FourByteBuffer = DE.getU32(C&: Cursor);
2138 KdStream << Indent << ".amdhsa_kernarg_size "
2139 << FourByteBuffer << '\n';
2140 return true;
2141
2142 case amdhsa::RESERVED0_OFFSET:
2143 // 4 reserved bytes, must be 0.
2144 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2145 for (int I = 0; I < 4; ++I) {
2146 if (ReservedBytes[I] != 0)
2147 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED0_OFFSET, WidthInBytes: 4);
2148 }
2149 return true;
2150
2151 case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2152 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2153 // So far no directive controls this for Code Object V3, so simply skip for
2154 // disassembly.
2155 DE.skip(C&: Cursor, Length: 8);
2156 return true;
2157
2158 case amdhsa::RESERVED1_OFFSET:
2159 // 20 reserved bytes, must be 0.
2160 ReservedBytes = DE.getBytes(C&: Cursor, Length: 20);
2161 for (int I = 0; I < 20; ++I) {
2162 if (ReservedBytes[I] != 0)
2163 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED1_OFFSET, WidthInBytes: 20);
2164 }
2165 return true;
2166
2167 case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2168 FourByteBuffer = DE.getU32(C&: Cursor);
2169 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2170
2171 case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2172 FourByteBuffer = DE.getU32(C&: Cursor);
2173 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2174
2175 case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2176 FourByteBuffer = DE.getU32(C&: Cursor);
2177 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2178
2179 case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2180 using namespace amdhsa;
2181 TwoByteBuffer = DE.getU16(C&: Cursor);
2182
2183 if (!hasArchitectedFlatScratch())
2184 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2185 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2186 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2187 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2188 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2189 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2190 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2191 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2192 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2193 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2194 if (!hasArchitectedFlatScratch())
2195 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2196 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2197 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2198 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2199
2200 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2201 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED0,
2202 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2203
2204 // Reserved for GFX9
2205 if (isGFX9() &&
2206 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2207 return createReservedKDBitsError(
2208 Mask: KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2209 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, Msg: "must be zero on gfx9");
2210 } else if (isGFX10Plus()) {
2211 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2212 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2213 }
2214
2215 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2216 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2217 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2218
2219 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2220 return createReservedKDBitsError(Mask: KERNEL_CODE_PROPERTY_RESERVED1,
2221 BaseBytes: amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2222 }
2223
2224 return true;
2225
2226 case amdhsa::KERNARG_PRELOAD_OFFSET:
2227 using namespace amdhsa;
2228 TwoByteBuffer = DE.getU16(C&: Cursor);
2229 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2230 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2231 KERNARG_PRELOAD_SPEC_LENGTH);
2232 }
2233
2234 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2235 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2236 KERNARG_PRELOAD_SPEC_OFFSET);
2237 }
2238 return true;
2239
2240 case amdhsa::RESERVED3_OFFSET:
2241 // 4 bytes from here are reserved, must be 0.
2242 ReservedBytes = DE.getBytes(C&: Cursor, Length: 4);
2243 for (int I = 0; I < 4; ++I) {
2244 if (ReservedBytes[I] != 0)
2245 return createReservedKDBytesError(BaseInBytes: amdhsa::RESERVED3_OFFSET, WidthInBytes: 4);
2246 }
2247 return true;
2248
2249 default:
2250 llvm_unreachable("Unhandled index. Case statements cover everything.");
2251 return true;
2252 }
2253#undef PRINT_DIRECTIVE
2254}
2255
2256Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2257 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2258
2259 // CP microcode requires the kernel descriptor to be 64 aligned.
2260 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2261 return createStringError(EC: std::errc::invalid_argument,
2262 Fmt: "kernel descriptor must be 64-byte aligned");
2263
2264 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2265 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2266 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2267 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2268 // when required.
2269 if (isGFX10Plus()) {
2270 uint16_t KernelCodeProperties =
2271 support::endian::read16(P: &Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2272 E: llvm::endianness::little);
2273 EnableWavefrontSize32 =
2274 AMDHSA_BITS_GET(KernelCodeProperties,
2275 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2276 }
2277
2278 std::string Kd;
2279 raw_string_ostream KdStream(Kd);
2280 KdStream << ".amdhsa_kernel " << KdName << '\n';
2281
2282 DataExtractor::Cursor C(0);
2283 while (C && C.tell() < Bytes.size()) {
2284 Expected<bool> Res = decodeKernelDescriptorDirective(Cursor&: C, Bytes, KdStream);
2285
2286 cantFail(Err: C.takeError());
2287
2288 if (!Res)
2289 return Res;
2290 }
2291 KdStream << ".end_amdhsa_kernel\n";
2292 outs() << KdStream.str();
2293 return true;
2294}
2295
2296Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2297 uint64_t &Size,
2298 ArrayRef<uint8_t> Bytes,
2299 uint64_t Address) const {
2300 // Right now only kernel descriptor needs to be handled.
2301 // We ignore all other symbols for target specific handling.
2302 // TODO:
2303 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2304 // Object V2 and V3 when symbols are marked protected.
2305
2306 // amd_kernel_code_t for Code Object V2.
2307 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2308 Size = 256;
2309 return createStringError(EC: std::errc::invalid_argument,
2310 Fmt: "code object v2 is not supported");
2311 }
2312
2313 // Code Object V3 kernel descriptors.
2314 StringRef Name = Symbol.Name;
2315 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(Suffix: StringRef(".kd"))) {
2316 Size = 64; // Size = 64 regardless of success or failure.
2317 return decodeKernelDescriptor(KdName: Name.drop_back(N: 3), Bytes, KdAddress: Address);
2318 }
2319
2320 return false;
2321}
2322
2323//===----------------------------------------------------------------------===//
2324// AMDGPUSymbolizer
2325//===----------------------------------------------------------------------===//
2326
2327// Try to find symbol name for specified label
2328bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2329 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2330 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2331 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2332
2333 if (!IsBranch) {
2334 return false;
2335 }
2336
2337 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2338 if (!Symbols)
2339 return false;
2340
2341 auto Result = llvm::find_if(Range&: *Symbols, P: [Value](const SymbolInfoTy &Val) {
2342 return Val.Addr == static_cast<uint64_t>(Value) &&
2343 Val.Type == ELF::STT_NOTYPE;
2344 });
2345 if (Result != Symbols->end()) {
2346 auto *Sym = Ctx.getOrCreateSymbol(Name: Result->Name);
2347 const auto *Add = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
2348 Inst.addOperand(Op: MCOperand::createExpr(Val: Add));
2349 return true;
2350 }
2351 // Add to list of referenced addresses, so caller can synthesize a label.
2352 ReferencedAddresses.push_back(x: static_cast<uint64_t>(Value));
2353 return false;
2354}
2355
2356void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2357 int64_t Value,
2358 uint64_t Address) {
2359 llvm_unreachable("unimplemented");
2360}
2361
2362//===----------------------------------------------------------------------===//
2363// Initialization
2364//===----------------------------------------------------------------------===//
2365
2366static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2367 LLVMOpInfoCallback /*GetOpInfo*/,
2368 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2369 void *DisInfo,
2370 MCContext *Ctx,
2371 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2372 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2373}
2374
2375static MCDisassembler *createAMDGPUDisassembler(const Target &T,
2376 const MCSubtargetInfo &STI,
2377 MCContext &Ctx) {
2378 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2379}
2380
2381extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
2382 TargetRegistry::RegisterMCDisassembler(T&: getTheGCNTarget(),
2383 Fn: createAMDGPUDisassembler);
2384 TargetRegistry::RegisterMCSymbolizer(T&: getTheGCNTarget(),
2385 Fn: createAMDGPUSymbolizer);
2386}
2387

source code of llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp