1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPUTargetStreamer.h"
14#include "AMDGPUMCExpr.h"
15#include "AMDGPUMCKernelDescriptor.h"
16#include "AMDGPUPTNote.h"
17#include "Utils/AMDGPUBaseInfo.h"
18#include "Utils/AMDKernelCodeTUtils.h"
19#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
20#include "llvm/BinaryFormat/ELF.h"
21#include "llvm/MC/MCAssembler.h"
22#include "llvm/MC/MCContext.h"
23#include "llvm/MC/MCELFObjectWriter.h"
24#include "llvm/MC/MCELFStreamer.h"
25#include "llvm/MC/MCSubtargetInfo.h"
26#include "llvm/Support/AMDGPUMetadata.h"
27#include "llvm/Support/AMDHSAKernelDescriptor.h"
28#include "llvm/Support/Casting.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/FormattedStream.h"
31#include "llvm/TargetParser/TargetParser.h"
32
33using namespace llvm;
34using namespace llvm::AMDGPU;
35
36//===----------------------------------------------------------------------===//
37// AMDGPUTargetStreamer
38//===----------------------------------------------------------------------===//
39
40static cl::opt<unsigned>
41 ForceGenericVersion("amdgpu-force-generic-version",
42 cl::desc("Force a specific generic_v<N> flag to be "
43 "added. For testing purposes only."),
44 cl::ReallyHidden, cl::init(Val: 0));
45
46bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
47 msgpack::Document HSAMetadataDoc;
48 if (!HSAMetadataDoc.fromYAML(S: HSAMetadataString))
49 return false;
50 return EmitHSAMetadata(HSAMetadata&: HSAMetadataDoc, Strict: false);
51}
52
53StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
54 AMDGPU::GPUKind AK;
55
56 // clang-format off
57 switch (ElfMach) {
58 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
59 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
60 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
61 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
62 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
63 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
64 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
65 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
66 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
67 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
68 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
69 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
70 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
71 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
72 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
73 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
74 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
75 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
76 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break;
116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: AK = GK_GFX1153; break;
117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC; break;
121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC; break;
125 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
126 default: AK = GK_NONE; break;
127 }
128 // clang-format on
129
130 StringRef GPUName = getArchNameAMDGCN(AK);
131 if (GPUName != "")
132 return GPUName;
133 return getArchNameR600(AK);
134}
135
136unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
137 AMDGPU::GPUKind AK = parseArchAMDGCN(CPU: GPU);
138 if (AK == AMDGPU::GPUKind::GK_NONE)
139 AK = parseArchR600(CPU: GPU);
140
141 // clang-format off
142 switch (AK) {
143 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
144 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
145 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
146 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
147 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
148 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
149 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
150 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
151 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
152 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
153 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
154 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
155 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
156 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
157 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
158 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
159 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
160 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
161 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
162 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
163 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
164 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
165 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
166 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
167 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
168 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
169 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
170 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
171 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
172 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
173 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
174 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
175 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
176 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
177 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
178 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
179 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
180 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
181 case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
182 case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
183 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
184 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
185 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
186 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
187 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
188 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
189 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
190 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
191 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
192 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
193 case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
194 case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
195 case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
196 case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
197 case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
198 case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
199 case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
200 case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
201 case GK_GFX1153: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153;
202 case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
203 case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
204 case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
205 case GK_GFX9_4_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC;
206 case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
207 case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
208 case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
209 case GK_GFX12_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC;
210 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
211 }
212 // clang-format on
213
214 llvm_unreachable("unknown GPU");
215}
216
217//===----------------------------------------------------------------------===//
218// AMDGPUTargetAsmStreamer
219//===----------------------------------------------------------------------===//
220
221AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
222 formatted_raw_ostream &OS)
223 : AMDGPUTargetStreamer(S), OS(OS) { }
224
225// A hook for emitting stuff at the end.
226// We use it for emitting the accumulated PAL metadata as directives.
227// The PAL metadata is reset after it is emitted.
228void AMDGPUTargetAsmStreamer::finish() {
229 std::string S;
230 getPALMetadata()->toString(S);
231 OS << S;
232
233 // Reset the pal metadata so its data will not affect a compilation that
234 // reuses this object.
235 getPALMetadata()->reset();
236}
237
238void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
239 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
240}
241
242void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
243 unsigned COV) {
244 AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV);
245 OS << "\t.amdhsa_code_object_version " << COV << '\n';
246}
247
248void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
249 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS,
250 const MCAsmInfo *MAI) {
251 printAMDGPUMCExpr(Expr: foldAMDGPUMCExpr(Expr, Ctx&: getContext()), OS, MAI);
252 };
253
254 OS << "\t.amd_kernel_code_t\n";
255 Header.EmitKernelCodeT(OS, Ctx&: getContext(), Helper: FoldAndPrint);
256 OS << "\t.end_amd_kernel_code_t\n";
257}
258
259void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
260 unsigned Type) {
261 switch (Type) {
262 default: llvm_unreachable("Invalid AMDGPU symbol type");
263 case ELF::STT_AMDGPU_HSA_KERNEL:
264 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
265 break;
266 }
267}
268
269void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
270 Align Alignment) {
271 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
272 << Alignment.value() << '\n';
273}
274
275void AMDGPUTargetAsmStreamer::EmitMCResourceInfo(
276 const MCSymbol *NumVGPR, const MCSymbol *NumAGPR,
277 const MCSymbol *NumExplicitSGPR, const MCSymbol *PrivateSegmentSize,
278 const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch,
279 const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion,
280 const MCSymbol *HasIndirectCall) {
281#define PRINT_RES_INFO(ARG) \
282 OS << "\t.set "; \
283 ARG->print(OS, getContext().getAsmInfo()); \
284 OS << ", "; \
285 ARG->getVariableValue()->print(OS, getContext().getAsmInfo()); \
286 Streamer.addBlankLine();
287
288 PRINT_RES_INFO(NumVGPR);
289 PRINT_RES_INFO(NumAGPR);
290 PRINT_RES_INFO(NumExplicitSGPR);
291 PRINT_RES_INFO(PrivateSegmentSize);
292 PRINT_RES_INFO(UsesVCC);
293 PRINT_RES_INFO(UsesFlatScratch);
294 PRINT_RES_INFO(HasDynamicallySizedStack);
295 PRINT_RES_INFO(HasRecursion);
296 PRINT_RES_INFO(HasIndirectCall);
297#undef PRINT_RES_INFO
298}
299
300void AMDGPUTargetAsmStreamer::EmitMCResourceMaximums(const MCSymbol *MaxVGPR,
301 const MCSymbol *MaxAGPR,
302 const MCSymbol *MaxSGPR) {
303#define PRINT_RES_INFO(ARG) \
304 OS << "\t.set "; \
305 ARG->print(OS, getContext().getAsmInfo()); \
306 OS << ", "; \
307 ARG->getVariableValue()->print(OS, getContext().getAsmInfo()); \
308 Streamer.addBlankLine();
309
310 PRINT_RES_INFO(MaxVGPR);
311 PRINT_RES_INFO(MaxAGPR);
312 PRINT_RES_INFO(MaxSGPR);
313#undef PRINT_RES_INFO
314}
315
316bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
317 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
318 return true;
319}
320
321bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
322 msgpack::Document &HSAMetadataDoc, bool Strict) {
323 HSAMD::V3::MetadataVerifier Verifier(Strict);
324 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
325 return false;
326
327 std::string HSAMetadataString;
328 raw_string_ostream StrOS(HSAMetadataString);
329 HSAMetadataDoc.toYAML(OS&: StrOS);
330
331 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
332 OS << StrOS.str() << '\n';
333 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
334 return true;
335}
336
337bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
338 const uint32_t Encoded_s_code_end = 0xbf9f0000;
339 const uint32_t Encoded_s_nop = 0xbf800000;
340 uint32_t Encoded_pad = Encoded_s_code_end;
341
342 // Instruction cache line size in bytes.
343 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
344 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
345
346 // Extra padding amount in bytes to support prefetch mode 3.
347 unsigned FillSize = 3 * CacheLineSize;
348
349 if (AMDGPU::isGFX90A(STI)) {
350 Encoded_pad = Encoded_s_nop;
351 FillSize = 16 * CacheLineSize;
352 }
353
354 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
355 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
356 return true;
357}
358
359void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
360 const MCSubtargetInfo &STI, StringRef KernelName,
361 const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
362 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
363 const MCExpr *ReserveFlatScr) {
364 IsaVersion IVersion = getIsaVersion(GPU: STI.getCPU());
365 const MCAsmInfo *MAI = getContext().getAsmInfo();
366
367 OS << "\t.amdhsa_kernel " << KernelName << '\n';
368
369 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
370 StringRef Directive) {
371 OS << "\t\t" << Directive << ' ';
372 const MCExpr *ShiftedAndMaskedExpr =
373 MCKernelDescriptor::bits_get(Src: Expr, Shift, Mask, Ctx&: getContext());
374 const MCExpr *New = foldAMDGPUMCExpr(Expr: ShiftedAndMaskedExpr, Ctx&: getContext());
375 printAMDGPUMCExpr(Expr: New, OS, MAI);
376 OS << '\n';
377 };
378
379 auto EmitMCExpr = [&](const MCExpr *Value) {
380 const MCExpr *NewExpr = foldAMDGPUMCExpr(Expr: Value, Ctx&: getContext());
381 printAMDGPUMCExpr(Expr: NewExpr, OS, MAI);
382 };
383
384 OS << "\t\t.amdhsa_group_segment_fixed_size ";
385 EmitMCExpr(KD.group_segment_fixed_size);
386 OS << '\n';
387
388 OS << "\t\t.amdhsa_private_segment_fixed_size ";
389 EmitMCExpr(KD.private_segment_fixed_size);
390 OS << '\n';
391
392 OS << "\t\t.amdhsa_kernarg_size ";
393 EmitMCExpr(KD.kernarg_size);
394 OS << '\n';
395
396 PrintField(
397 KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_SHIFT,
398 amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, ".amdhsa_user_sgpr_count");
399
400 if (!hasArchitectedFlatScratch(STI))
401 PrintField(
402 KD.kernel_code_properties,
403 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
404 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
405 ".amdhsa_user_sgpr_private_segment_buffer");
406 PrintField(KD.kernel_code_properties,
407 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
408 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
409 ".amdhsa_user_sgpr_dispatch_ptr");
410 PrintField(KD.kernel_code_properties,
411 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
412 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
413 ".amdhsa_user_sgpr_queue_ptr");
414 PrintField(KD.kernel_code_properties,
415 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
416 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
417 ".amdhsa_user_sgpr_kernarg_segment_ptr");
418 PrintField(KD.kernel_code_properties,
419 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
420 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
421 ".amdhsa_user_sgpr_dispatch_id");
422 if (!hasArchitectedFlatScratch(STI))
423 PrintField(KD.kernel_code_properties,
424 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
425 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
426 ".amdhsa_user_sgpr_flat_scratch_init");
427 if (hasKernargPreload(STI)) {
428 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
429 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
430 ".amdhsa_user_sgpr_kernarg_preload_length");
431 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
432 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
433 ".amdhsa_user_sgpr_kernarg_preload_offset");
434 }
435 PrintField(
436 KD.kernel_code_properties,
437 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
438 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
439 ".amdhsa_user_sgpr_private_segment_size");
440 if (IVersion.Major >= 10)
441 PrintField(KD.kernel_code_properties,
442 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
443 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
444 ".amdhsa_wavefront_size32");
445 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
446 PrintField(KD.kernel_code_properties,
447 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
448 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
449 ".amdhsa_uses_dynamic_stack");
450 PrintField(KD.compute_pgm_rsrc2,
451 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
452 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
453 (hasArchitectedFlatScratch(STI)
454 ? ".amdhsa_enable_private_segment"
455 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
456 PrintField(KD.compute_pgm_rsrc2,
457 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
458 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
459 ".amdhsa_system_sgpr_workgroup_id_x");
460 PrintField(KD.compute_pgm_rsrc2,
461 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
462 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
463 ".amdhsa_system_sgpr_workgroup_id_y");
464 PrintField(KD.compute_pgm_rsrc2,
465 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
466 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
467 ".amdhsa_system_sgpr_workgroup_id_z");
468 PrintField(KD.compute_pgm_rsrc2,
469 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
470 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
471 ".amdhsa_system_sgpr_workgroup_info");
472 PrintField(KD.compute_pgm_rsrc2,
473 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
474 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
475 ".amdhsa_system_vgpr_workitem_id");
476
477 // These directives are required.
478 OS << "\t\t.amdhsa_next_free_vgpr ";
479 EmitMCExpr(NextVGPR);
480 OS << '\n';
481
482 OS << "\t\t.amdhsa_next_free_sgpr ";
483 EmitMCExpr(NextSGPR);
484 OS << '\n';
485
486 if (AMDGPU::isGFX90A(STI)) {
487 // MCExpr equivalent of taking the (accum_offset + 1) * 4.
488 const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
489 Src: KD.compute_pgm_rsrc3,
490 Shift: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
491 Mask: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx&: getContext());
492 accum_bits = MCBinaryExpr::createAdd(
493 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext());
494 accum_bits = MCBinaryExpr::createMul(
495 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext());
496 OS << "\t\t.amdhsa_accum_offset ";
497 const MCExpr *New = foldAMDGPUMCExpr(Expr: accum_bits, Ctx&: getContext());
498 printAMDGPUMCExpr(Expr: New, OS, MAI);
499 OS << '\n';
500 }
501
502 OS << "\t\t.amdhsa_reserve_vcc ";
503 EmitMCExpr(ReserveVCC);
504 OS << '\n';
505
506 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
507 OS << "\t\t.amdhsa_reserve_flat_scratch ";
508 EmitMCExpr(ReserveFlatScr);
509 OS << '\n';
510 }
511
512 switch (CodeObjectVersion) {
513 default:
514 break;
515 case AMDGPU::AMDHSA_COV4:
516 case AMDGPU::AMDHSA_COV5:
517 if (getTargetID()->isXnackSupported())
518 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
519 break;
520 }
521
522 PrintField(KD.compute_pgm_rsrc1,
523 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
524 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
525 ".amdhsa_float_round_mode_32");
526 PrintField(KD.compute_pgm_rsrc1,
527 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
528 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
529 ".amdhsa_float_round_mode_16_64");
530 PrintField(KD.compute_pgm_rsrc1,
531 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
532 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
533 ".amdhsa_float_denorm_mode_32");
534 PrintField(KD.compute_pgm_rsrc1,
535 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
536 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
537 ".amdhsa_float_denorm_mode_16_64");
538 if (IVersion.Major < 12) {
539 PrintField(KD.compute_pgm_rsrc1,
540 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
541 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
542 ".amdhsa_dx10_clamp");
543 PrintField(KD.compute_pgm_rsrc1,
544 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
545 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
546 ".amdhsa_ieee_mode");
547 }
548 if (IVersion.Major >= 9) {
549 PrintField(KD.compute_pgm_rsrc1,
550 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
551 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
552 ".amdhsa_fp16_overflow");
553 }
554 if (AMDGPU::isGFX90A(STI))
555 PrintField(KD.compute_pgm_rsrc3,
556 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
557 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
558 if (IVersion.Major >= 10) {
559 PrintField(KD.compute_pgm_rsrc1,
560 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
561 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
562 ".amdhsa_workgroup_processor_mode");
563 PrintField(KD.compute_pgm_rsrc1,
564 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
565 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
566 ".amdhsa_memory_ordered");
567 PrintField(KD.compute_pgm_rsrc1,
568 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
569 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
570 ".amdhsa_forward_progress");
571 }
572 if (IVersion.Major >= 10 && IVersion.Major < 12) {
573 PrintField(KD.compute_pgm_rsrc3,
574 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
575 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
576 ".amdhsa_shared_vgpr_count");
577 }
578 if (IVersion.Major == 11) {
579 PrintField(KD.compute_pgm_rsrc3,
580 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
581 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
582 ".amdhsa_inst_pref_size");
583 }
584 if (IVersion.Major >= 12) {
585 PrintField(KD.compute_pgm_rsrc3,
586 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
587 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
588 ".amdhsa_inst_pref_size");
589 PrintField(KD.compute_pgm_rsrc1,
590 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
591 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
592 ".amdhsa_round_robin_scheduling");
593 }
594 PrintField(
595 KD.compute_pgm_rsrc2,
596 amdhsa::
597 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
598 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
599 ".amdhsa_exception_fp_ieee_invalid_op");
600 PrintField(
601 KD.compute_pgm_rsrc2,
602 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
603 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
604 ".amdhsa_exception_fp_denorm_src");
605 PrintField(
606 KD.compute_pgm_rsrc2,
607 amdhsa::
608 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
609 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
610 ".amdhsa_exception_fp_ieee_div_zero");
611 PrintField(
612 KD.compute_pgm_rsrc2,
613 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
614 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
615 ".amdhsa_exception_fp_ieee_overflow");
616 PrintField(
617 KD.compute_pgm_rsrc2,
618 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
619 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
620 ".amdhsa_exception_fp_ieee_underflow");
621 PrintField(
622 KD.compute_pgm_rsrc2,
623 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
624 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
625 ".amdhsa_exception_fp_ieee_inexact");
626 PrintField(
627 KD.compute_pgm_rsrc2,
628 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
629 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
630 ".amdhsa_exception_int_div_zero");
631
632 OS << "\t.end_amdhsa_kernel\n";
633}
634
635//===----------------------------------------------------------------------===//
636// AMDGPUTargetELFStreamer
637//===----------------------------------------------------------------------===//
638
639AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
640 const MCSubtargetInfo &STI)
641 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
642
643MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
644 return static_cast<MCELFStreamer &>(Streamer);
645}
646
647// A hook for emitting stuff at the end.
648// We use it for emitting the accumulated PAL metadata as a .note record.
649// The PAL metadata is reset after it is emitted.
650void AMDGPUTargetELFStreamer::finish() {
651 ELFObjectWriter &W = getStreamer().getWriter();
652 W.setELFHeaderEFlags(getEFlags());
653 W.setOverrideABIVersion(
654 getELFABIVersion(OS: STI.getTargetTriple(), CodeObjectVersion));
655
656 std::string Blob;
657 const char *Vendor = getPALMetadata()->getVendor();
658 unsigned Type = getPALMetadata()->getType();
659 getPALMetadata()->toBlob(Type, S&: Blob);
660 if (Blob.empty())
661 return;
662 EmitNote(Name: Vendor, DescSize: MCConstantExpr::create(Value: Blob.size(), Ctx&: getContext()), NoteType: Type,
663 EmitDesc: [&](MCELFStreamer &OS) { OS.emitBytes(Data: Blob); });
664
665 // Reset the pal metadata so its data will not affect a compilation that
666 // reuses this object.
667 getPALMetadata()->reset();
668}
669
670void AMDGPUTargetELFStreamer::EmitNote(
671 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
672 function_ref<void(MCELFStreamer &)> EmitDesc) {
673 auto &S = getStreamer();
674 auto &Context = S.getContext();
675
676 auto NameSZ = Name.size() + 1;
677
678 unsigned NoteFlags = 0;
679 // TODO Apparently, this is currently needed for OpenCL as mentioned in
680 // https://reviews.llvm.org/D74995
681 if (isHsaAbi(STI))
682 NoteFlags = ELF::SHF_ALLOC;
683
684 S.pushSection();
685 S.switchSection(
686 Section: Context.getELFSection(Section: ElfNote::SectionName, Type: ELF::SHT_NOTE, Flags: NoteFlags));
687 S.emitInt32(Value: NameSZ); // namesz
688 S.emitValue(Value: DescSZ, Size: 4); // descz
689 S.emitInt32(Value: NoteType); // type
690 S.emitBytes(Data: Name); // name
691 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
692 EmitDesc(S); // desc
693 S.emitValueToAlignment(Align(4), 0, 1, 0); // padding 0
694 S.popSection();
695}
696
697unsigned AMDGPUTargetELFStreamer::getEFlags() {
698 switch (STI.getTargetTriple().getArch()) {
699 default:
700 llvm_unreachable("Unsupported Arch");
701 case Triple::r600:
702 return getEFlagsR600();
703 case Triple::amdgcn:
704 return getEFlagsAMDGCN();
705 }
706}
707
708unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
709 assert(STI.getTargetTriple().getArch() == Triple::r600);
710
711 return getElfMach(GPU: STI.getCPU());
712}
713
714unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
715 assert(STI.getTargetTriple().isAMDGCN());
716
717 switch (STI.getTargetTriple().getOS()) {
718 default:
719 // TODO: Why are some tests have "mingw" listed as OS?
720 // llvm_unreachable("Unsupported OS");
721 case Triple::UnknownOS:
722 return getEFlagsUnknownOS();
723 case Triple::AMDHSA:
724 return getEFlagsAMDHSA();
725 case Triple::AMDPAL:
726 return getEFlagsAMDPAL();
727 case Triple::Mesa3D:
728 return getEFlagsMesa3D();
729 }
730}
731
732unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
733 // TODO: Why are some tests have "mingw" listed as OS?
734 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
735
736 return getEFlagsV3();
737}
738
739unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
740 assert(isHsaAbi(STI));
741
742 if (CodeObjectVersion >= 6)
743 return getEFlagsV6();
744 return getEFlagsV4();
745}
746
747unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
748 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
749
750 return getEFlagsV3();
751}
752
753unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
754 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
755
756 return getEFlagsV3();
757}
758
759unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
760 unsigned EFlagsV3 = 0;
761
762 // mach.
763 EFlagsV3 |= getElfMach(GPU: STI.getCPU());
764
765 // xnack.
766 if (getTargetID()->isXnackOnOrAny())
767 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
768 // sramecc.
769 if (getTargetID()->isSramEccOnOrAny())
770 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
771
772 return EFlagsV3;
773}
774
775unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
776 unsigned EFlagsV4 = 0;
777
778 // mach.
779 EFlagsV4 |= getElfMach(GPU: STI.getCPU());
780
781 // xnack.
782 switch (getTargetID()->getXnackSetting()) {
783 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
784 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
785 break;
786 case AMDGPU::IsaInfo::TargetIDSetting::Any:
787 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
788 break;
789 case AMDGPU::IsaInfo::TargetIDSetting::Off:
790 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
791 break;
792 case AMDGPU::IsaInfo::TargetIDSetting::On:
793 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
794 break;
795 }
796 // sramecc.
797 switch (getTargetID()->getSramEccSetting()) {
798 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
799 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
800 break;
801 case AMDGPU::IsaInfo::TargetIDSetting::Any:
802 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
803 break;
804 case AMDGPU::IsaInfo::TargetIDSetting::Off:
805 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
806 break;
807 case AMDGPU::IsaInfo::TargetIDSetting::On:
808 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
809 break;
810 }
811
812 return EFlagsV4;
813}
814
815unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
816 unsigned Flags = getEFlagsV4();
817
818 unsigned Version = ForceGenericVersion;
819 if (!Version) {
820 switch (parseArchAMDGCN(CPU: STI.getCPU())) {
821 case AMDGPU::GK_GFX9_GENERIC:
822 Version = GenericVersion::GFX9;
823 break;
824 case AMDGPU::GK_GFX9_4_GENERIC:
825 Version = GenericVersion::GFX9_4;
826 break;
827 case AMDGPU::GK_GFX10_1_GENERIC:
828 Version = GenericVersion::GFX10_1;
829 break;
830 case AMDGPU::GK_GFX10_3_GENERIC:
831 Version = GenericVersion::GFX10_3;
832 break;
833 case AMDGPU::GK_GFX11_GENERIC:
834 Version = GenericVersion::GFX11;
835 break;
836 case AMDGPU::GK_GFX12_GENERIC:
837 Version = GenericVersion::GFX12;
838 break;
839 default:
840 break;
841 }
842 }
843
844 // Versions start at 1.
845 if (Version) {
846 if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX)
847 report_fatal_error(reason: "Cannot encode generic code object version " +
848 Twine(Version) +
849 " - no ELF flag can represent this version!");
850 Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
851 }
852
853 return Flags;
854}
855
856void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
857
858void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
859 MCStreamer &OS = getStreamer();
860 OS.pushSection();
861 Header.EmitKernelCodeT(OS, Ctx&: getContext());
862 OS.popSection();
863}
864
865void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
866 unsigned Type) {
867 MCSymbolELF *Symbol = cast<MCSymbolELF>(
868 Val: getStreamer().getContext().getOrCreateSymbol(Name: SymbolName));
869 Symbol->setType(Type);
870}
871
872void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
873 Align Alignment) {
874 MCSymbolELF *SymbolELF = cast<MCSymbolELF>(Val: Symbol);
875 SymbolELF->setType(ELF::STT_OBJECT);
876
877 if (!SymbolELF->isBindingSet())
878 SymbolELF->setBinding(ELF::STB_GLOBAL);
879
880 if (SymbolELF->declareCommon(Size, Alignment, Target: true)) {
881 report_fatal_error(reason: "Symbol: " + Symbol->getName() +
882 " redeclared as different type");
883 }
884
885 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
886 SymbolELF->setSize(MCConstantExpr::create(Value: Size, Ctx&: getContext()));
887}
888
889bool AMDGPUTargetELFStreamer::EmitISAVersion() {
890 // Create two labels to mark the beginning and end of the desc field
891 // and a MCExpr to calculate the size of the desc field.
892 auto &Context = getContext();
893 auto *DescBegin = Context.createTempSymbol();
894 auto *DescEnd = Context.createTempSymbol();
895 auto *DescSZ = MCBinaryExpr::createSub(
896 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
897 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
898
899 EmitNote(Name: ElfNote::NoteNameV2, DescSZ, NoteType: ELF::NT_AMD_HSA_ISA_NAME,
900 EmitDesc: [&](MCELFStreamer &OS) {
901 OS.emitLabel(Symbol: DescBegin);
902 OS.emitBytes(Data: getTargetID()->toString());
903 OS.emitLabel(Symbol: DescEnd);
904 });
905 return true;
906}
907
908bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
909 bool Strict) {
910 HSAMD::V3::MetadataVerifier Verifier(Strict);
911 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
912 return false;
913
914 std::string HSAMetadataString;
915 HSAMetadataDoc.writeToBlob(Blob&: HSAMetadataString);
916
917 // Create two labels to mark the beginning and end of the desc field
918 // and a MCExpr to calculate the size of the desc field.
919 auto &Context = getContext();
920 auto *DescBegin = Context.createTempSymbol();
921 auto *DescEnd = Context.createTempSymbol();
922 auto *DescSZ = MCBinaryExpr::createSub(
923 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
924 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
925
926 EmitNote(Name: ElfNote::NoteNameV3, DescSZ, NoteType: ELF::NT_AMDGPU_METADATA,
927 EmitDesc: [&](MCELFStreamer &OS) {
928 OS.emitLabel(Symbol: DescBegin);
929 OS.emitBytes(Data: HSAMetadataString);
930 OS.emitLabel(Symbol: DescEnd);
931 });
932 return true;
933}
934
935bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
936 const uint32_t Encoded_s_code_end = 0xbf9f0000;
937 const uint32_t Encoded_s_nop = 0xbf800000;
938 uint32_t Encoded_pad = Encoded_s_code_end;
939
940 // Instruction cache line size in bytes.
941 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
942 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
943
944 // Extra padding amount in bytes to support prefetch mode 3.
945 unsigned FillSize = 3 * CacheLineSize;
946
947 if (AMDGPU::isGFX90A(STI)) {
948 Encoded_pad = Encoded_s_nop;
949 FillSize = 16 * CacheLineSize;
950 }
951
952 MCStreamer &OS = getStreamer();
953 OS.pushSection();
954 OS.emitValueToAlignment(Alignment: Align(CacheLineSize), Value: Encoded_pad, ValueSize: 4);
955 for (unsigned I = 0; I < FillSize; I += 4)
956 OS.emitInt32(Value: Encoded_pad);
957 OS.popSection();
958 return true;
959}
960
961void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
962 const MCSubtargetInfo &STI, StringRef KernelName,
963 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
964 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
965 const MCExpr *ReserveFlatScr) {
966 auto &Streamer = getStreamer();
967 auto &Context = Streamer.getContext();
968
969 MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
970 Val: Context.getOrCreateSymbol(Name: Twine(KernelName)));
971 MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
972 Val: Context.getOrCreateSymbol(Name: Twine(KernelName) + Twine(".kd")));
973
974 // Copy kernel descriptor symbol's binding, other and visibility from the
975 // kernel code symbol.
976 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
977 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
978 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
979 // Kernel descriptor symbol's type and size are fixed.
980 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
981 KernelDescriptorSymbol->setSize(
982 MCConstantExpr::create(Value: sizeof(amdhsa::kernel_descriptor_t), Ctx&: Context));
983
984 // The visibility of the kernel code symbol must be protected or less to allow
985 // static relocations from the kernel descriptor to be used.
986 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
987 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
988
989 Streamer.emitLabel(Symbol: KernelDescriptorSymbol);
990 Streamer.emitValue(
991 Value: KernelDescriptor.group_segment_fixed_size,
992 Size: sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size));
993 Streamer.emitValue(
994 Value: KernelDescriptor.private_segment_fixed_size,
995 Size: sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size));
996 Streamer.emitValue(Value: KernelDescriptor.kernarg_size,
997 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_size));
998
999 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
1000 Streamer.emitInt8(Value: 0u);
1001
1002 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
1003 // expression being created is:
1004 // (start of kernel code) - (start of kernel descriptor)
1005 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
1006 Streamer.emitValue(
1007 Value: MCBinaryExpr::createSub(
1008 LHS: MCSymbolRefExpr::create(Symbol: KernelCodeSymbol, Kind: AMDGPUMCExpr::S_REL64,
1009 Ctx&: Context),
1010 RHS: MCSymbolRefExpr::create(Symbol: KernelDescriptorSymbol, Ctx&: Context), Ctx&: Context),
1011 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset));
1012 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
1013 Streamer.emitInt8(Value: 0u);
1014 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc3,
1015 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3));
1016 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc1,
1017 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1));
1018 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc2,
1019 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2));
1020 Streamer.emitValue(
1021 Value: KernelDescriptor.kernel_code_properties,
1022 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties));
1023 Streamer.emitValue(Value: KernelDescriptor.kernarg_preload,
1024 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_preload));
1025 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
1026 Streamer.emitInt8(Value: 0u);
1027}
1028

source code of llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp