1 | //===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Code to lower AMDGPU MachineInstrs to their corresponding MCInst. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | // |
14 | |
15 | #include "AMDGPUMCInstLower.h" |
16 | #include "AMDGPU.h" |
17 | #include "AMDGPUAsmPrinter.h" |
18 | #include "AMDGPUMachineFunction.h" |
19 | #include "AMDGPUTargetMachine.h" |
20 | #include "MCTargetDesc/AMDGPUInstPrinter.h" |
21 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
23 | #include "llvm/CodeGen/MachineInstr.h" |
24 | #include "llvm/IR/Constants.h" |
25 | #include "llvm/IR/Function.h" |
26 | #include "llvm/IR/GlobalVariable.h" |
27 | #include "llvm/MC/MCCodeEmitter.h" |
28 | #include "llvm/MC/MCContext.h" |
29 | #include "llvm/MC/MCExpr.h" |
30 | #include "llvm/MC/MCInst.h" |
31 | #include "llvm/MC/MCObjectStreamer.h" |
32 | #include "llvm/MC/MCStreamer.h" |
33 | #include "llvm/Support/ErrorHandling.h" |
34 | #include "llvm/Support/Format.h" |
35 | #include <algorithm> |
36 | |
37 | using namespace llvm; |
38 | |
39 | #include "AMDGPUGenMCPseudoLowering.inc" |
40 | |
41 | AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, |
42 | const TargetSubtargetInfo &st, |
43 | const AsmPrinter &ap): |
44 | Ctx(ctx), ST(st), AP(ap) { } |
45 | |
46 | static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) { |
47 | switch (MOFlags) { |
48 | default: |
49 | return MCSymbolRefExpr::VK_None; |
50 | case SIInstrInfo::MO_GOTPCREL: |
51 | return MCSymbolRefExpr::VK_GOTPCREL; |
52 | case SIInstrInfo::MO_GOTPCREL32_LO: |
53 | return MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_LO; |
54 | case SIInstrInfo::MO_GOTPCREL32_HI: |
55 | return MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_HI; |
56 | case SIInstrInfo::MO_REL32_LO: |
57 | return MCSymbolRefExpr::VK_AMDGPU_REL32_LO; |
58 | case SIInstrInfo::MO_REL32_HI: |
59 | return MCSymbolRefExpr::VK_AMDGPU_REL32_HI; |
60 | case SIInstrInfo::MO_ABS32_LO: |
61 | return MCSymbolRefExpr::VK_AMDGPU_ABS32_LO; |
62 | case SIInstrInfo::MO_ABS32_HI: |
63 | return MCSymbolRefExpr::VK_AMDGPU_ABS32_HI; |
64 | } |
65 | } |
66 | |
67 | bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO, |
68 | MCOperand &MCOp) const { |
69 | switch (MO.getType()) { |
70 | default: |
71 | break; |
72 | case MachineOperand::MO_Immediate: |
73 | MCOp = MCOperand::createImm(Val: MO.getImm()); |
74 | return true; |
75 | case MachineOperand::MO_Register: |
76 | MCOp = MCOperand::createReg(Reg: AMDGPU::getMCReg(Reg: MO.getReg(), STI: ST)); |
77 | return true; |
78 | case MachineOperand::MO_MachineBasicBlock: |
79 | MCOp = MCOperand::createExpr( |
80 | Val: MCSymbolRefExpr::create(Symbol: MO.getMBB()->getSymbol(), Ctx)); |
81 | return true; |
82 | case MachineOperand::MO_GlobalAddress: { |
83 | const GlobalValue *GV = MO.getGlobal(); |
84 | SmallString<128> SymbolName; |
85 | AP.getNameWithPrefix(Name&: SymbolName, GV); |
86 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: SymbolName); |
87 | const MCExpr *Expr = |
88 | MCSymbolRefExpr::create(Symbol: Sym, Kind: getVariantKind(MOFlags: MO.getTargetFlags()),Ctx); |
89 | int64_t Offset = MO.getOffset(); |
90 | if (Offset != 0) { |
91 | Expr = MCBinaryExpr::createAdd(LHS: Expr, |
92 | RHS: MCConstantExpr::create(Value: Offset, Ctx), Ctx); |
93 | } |
94 | MCOp = MCOperand::createExpr(Val: Expr); |
95 | return true; |
96 | } |
97 | case MachineOperand::MO_ExternalSymbol: { |
98 | MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: StringRef(MO.getSymbolName())); |
99 | Sym->setExternal(true); |
100 | const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Symbol: Sym, Ctx); |
101 | MCOp = MCOperand::createExpr(Val: Expr); |
102 | return true; |
103 | } |
104 | case MachineOperand::MO_RegisterMask: |
105 | // Regmasks are like implicit defs. |
106 | return false; |
107 | case MachineOperand::MO_MCSymbol: |
108 | if (MO.getTargetFlags() == SIInstrInfo::MO_FAR_BRANCH_OFFSET) { |
109 | MCSymbol *Sym = MO.getMCSymbol(); |
110 | MCOp = MCOperand::createExpr(Val: Sym->getVariableValue()); |
111 | return true; |
112 | } |
113 | break; |
114 | } |
115 | llvm_unreachable("unknown operand type" ); |
116 | } |
117 | |
118 | void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { |
119 | unsigned Opcode = MI->getOpcode(); |
120 | const auto *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); |
121 | |
122 | // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We |
123 | // need to select it to the subtarget specific version, and there's no way to |
124 | // do that with a single pseudo source operation. |
125 | if (Opcode == AMDGPU::S_SETPC_B64_return) |
126 | Opcode = AMDGPU::S_SETPC_B64; |
127 | else if (Opcode == AMDGPU::SI_CALL) { |
128 | // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the |
129 | // called function (which we need to remove here). |
130 | OutMI.setOpcode(TII->pseudoToMCOpcode(AMDGPU::S_SWAPPC_B64)); |
131 | MCOperand Dest, Src; |
132 | lowerOperand(MO: MI->getOperand(i: 0), MCOp&: Dest); |
133 | lowerOperand(MO: MI->getOperand(i: 1), MCOp&: Src); |
134 | OutMI.addOperand(Op: Dest); |
135 | OutMI.addOperand(Op: Src); |
136 | return; |
137 | } else if (Opcode == AMDGPU::SI_TCRETURN || |
138 | Opcode == AMDGPU::SI_TCRETURN_GFX) { |
139 | // TODO: How to use branch immediate and avoid register+add? |
140 | Opcode = AMDGPU::S_SETPC_B64; |
141 | } |
142 | |
143 | int MCOpcode = TII->pseudoToMCOpcode(Opcode); |
144 | if (MCOpcode == -1) { |
145 | LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); |
146 | C.emitError(ErrorStr: "AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " |
147 | "a target-specific version: " + Twine(MI->getOpcode())); |
148 | } |
149 | |
150 | OutMI.setOpcode(MCOpcode); |
151 | |
152 | for (const MachineOperand &MO : MI->explicit_operands()) { |
153 | MCOperand MCOp; |
154 | lowerOperand(MO, MCOp); |
155 | OutMI.addOperand(Op: MCOp); |
156 | } |
157 | |
158 | int FIIdx = AMDGPU::getNamedOperandIdx(MCOpcode, AMDGPU::OpName::fi); |
159 | if (FIIdx >= (int)OutMI.getNumOperands()) |
160 | OutMI.addOperand(Op: MCOperand::createImm(Val: 0)); |
161 | } |
162 | |
163 | bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO, |
164 | MCOperand &MCOp) const { |
165 | const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); |
166 | AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); |
167 | return MCInstLowering.lowerOperand(MO, MCOp); |
168 | } |
169 | |
170 | const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) { |
171 | |
172 | // Intercept LDS variables with known addresses |
173 | if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(Val: CV)) { |
174 | if (std::optional<uint32_t> Address = |
175 | AMDGPUMachineFunction::getLDSAbsoluteAddress(GV: *GV)) { |
176 | auto *IntTy = Type::getInt32Ty(C&: CV->getContext()); |
177 | return AsmPrinter::lowerConstant(CV: ConstantInt::get(Ty: IntTy, V: *Address)); |
178 | } |
179 | } |
180 | |
181 | if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext)) |
182 | return E; |
183 | return AsmPrinter::lowerConstant(CV); |
184 | } |
185 | |
186 | void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { |
187 | // FIXME: Enable feature predicate checks once all the test pass. |
188 | // AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(), |
189 | // getSubtargetInfo().getFeatureBits()); |
190 | |
191 | if (emitPseudoExpansionLowering(OutStreamer&: *OutStreamer, MI)) |
192 | return; |
193 | |
194 | const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>(); |
195 | AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this); |
196 | |
197 | StringRef Err; |
198 | if (!STI.getInstrInfo()->verifyInstruction(MI: *MI, ErrInfo&: Err)) { |
199 | LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); |
200 | C.emitError(ErrorStr: "Illegal instruction detected: " + Err); |
201 | MI->print(OS&: errs()); |
202 | } |
203 | |
204 | if (MI->isBundle()) { |
205 | const MachineBasicBlock *MBB = MI->getParent(); |
206 | MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); |
207 | while (I != MBB->instr_end() && I->isInsideBundle()) { |
208 | emitInstruction(MI: &*I); |
209 | ++I; |
210 | } |
211 | } else { |
212 | // We don't want these pseudo instructions encoded. They are |
213 | // placeholder terminator instructions and should only be printed as |
214 | // comments. |
215 | if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) { |
216 | if (isVerbose()) |
217 | OutStreamer->emitRawComment(T: " return to shader part epilog" ); |
218 | return; |
219 | } |
220 | |
221 | if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) { |
222 | if (isVerbose()) |
223 | OutStreamer->emitRawComment(T: " wave barrier" ); |
224 | return; |
225 | } |
226 | |
227 | if (MI->getOpcode() == AMDGPU::SCHED_BARRIER) { |
228 | if (isVerbose()) { |
229 | std::string HexString; |
230 | raw_string_ostream HexStream(HexString); |
231 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
232 | OutStreamer->emitRawComment(T: " sched_barrier mask(" + HexString + ")" ); |
233 | } |
234 | return; |
235 | } |
236 | |
237 | if (MI->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER) { |
238 | if (isVerbose()) { |
239 | std::string HexString; |
240 | raw_string_ostream HexStream(HexString); |
241 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
242 | OutStreamer->emitRawComment( |
243 | T: " sched_group_barrier mask(" + HexString + ") size(" + |
244 | Twine(MI->getOperand(i: 1).getImm()) + ") SyncID(" + |
245 | Twine(MI->getOperand(i: 2).getImm()) + ")" ); |
246 | } |
247 | return; |
248 | } |
249 | |
250 | if (MI->getOpcode() == AMDGPU::IGLP_OPT) { |
251 | if (isVerbose()) { |
252 | std::string HexString; |
253 | raw_string_ostream HexStream(HexString); |
254 | HexStream << format_hex(N: MI->getOperand(i: 0).getImm(), Width: 10, Upper: true); |
255 | OutStreamer->emitRawComment(T: " iglp_opt mask(" + HexString + ")" ); |
256 | } |
257 | return; |
258 | } |
259 | |
260 | if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) { |
261 | if (isVerbose()) |
262 | OutStreamer->emitRawComment(T: " divergent unreachable" ); |
263 | return; |
264 | } |
265 | |
266 | if (MI->isMetaInstruction()) { |
267 | if (isVerbose()) |
268 | OutStreamer->emitRawComment(T: " meta instruction" ); |
269 | return; |
270 | } |
271 | |
272 | MCInst TmpInst; |
273 | MCInstLowering.lower(MI, OutMI&: TmpInst); |
274 | EmitToStreamer(S&: *OutStreamer, Inst: TmpInst); |
275 | |
276 | #ifdef EXPENSIVE_CHECKS |
277 | // Check getInstSizeInBytes on explicitly specified CPUs (it cannot |
278 | // work correctly for the generic CPU). |
279 | // |
280 | // The isPseudo check really shouldn't be here, but unfortunately there are |
281 | // some negative lit tests that depend on being able to continue through |
282 | // here even when pseudo instructions haven't been lowered. |
283 | // |
284 | // We also overestimate branch sizes with the offset bug. |
285 | if (!MI->isPseudo() && STI.isCPUStringValid(STI.getCPU()) && |
286 | (!STI.hasOffset3fBug() || !MI->isBranch())) { |
287 | SmallVector<MCFixup, 4> Fixups; |
288 | SmallVector<char, 16> CodeBytes; |
289 | |
290 | std::unique_ptr<MCCodeEmitter> InstEmitter(createAMDGPUMCCodeEmitter( |
291 | *STI.getInstrInfo(), OutContext)); |
292 | InstEmitter->encodeInstruction(TmpInst, CodeBytes, Fixups, STI); |
293 | |
294 | assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI)); |
295 | } |
296 | #endif |
297 | |
298 | if (DumpCodeInstEmitter) { |
299 | // Disassemble instruction/operands to text |
300 | DisasmLines.resize(new_size: DisasmLines.size() + 1); |
301 | std::string &DisasmLine = DisasmLines.back(); |
302 | raw_string_ostream DisasmStream(DisasmLine); |
303 | |
304 | AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *STI.getInstrInfo(), |
305 | *STI.getRegisterInfo()); |
306 | InstPrinter.printInst(&TmpInst, 0, StringRef(), STI, DisasmStream); |
307 | |
308 | // Disassemble instruction/operands to hex representation. |
309 | SmallVector<MCFixup, 4> Fixups; |
310 | SmallVector<char, 16> CodeBytes; |
311 | |
312 | DumpCodeInstEmitter->encodeInstruction( |
313 | Inst: TmpInst, CB&: CodeBytes, Fixups, STI: MF->getSubtarget<MCSubtargetInfo>()); |
314 | HexLines.resize(new_size: HexLines.size() + 1); |
315 | std::string &HexLine = HexLines.back(); |
316 | raw_string_ostream HexStream(HexLine); |
317 | |
318 | for (size_t i = 0; i < CodeBytes.size(); i += 4) { |
319 | unsigned int CodeDWord = *(unsigned int *)&CodeBytes[i]; |
320 | HexStream << format(Fmt: "%s%08X" , Vals: (i > 0 ? " " : "" ), Vals: CodeDWord); |
321 | } |
322 | |
323 | DisasmStream.flush(); |
324 | DisasmLineMaxLen = std::max(a: DisasmLineMaxLen, b: DisasmLine.size()); |
325 | } |
326 | } |
327 | } |
328 | |