1//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// AMDGPU HSA Metadata Streamer.
11///
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUHSAMetadataStreamer.h"
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUTargetStreamer.h"
19#include "SIMachineFunctionInfo.h"
20#include "SIProgramInfo.h"
21#include "llvm/IR/Module.h"
22using namespace llvm;
23
24static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25 const DataLayout &DL) {
26 Type *Ty = Arg.getType();
27 MaybeAlign ArgAlign;
28 if (Arg.hasByRefAttr()) {
29 Ty = Arg.getParamByRefType();
30 ArgAlign = Arg.getParamAlign();
31 }
32
33 if (!ArgAlign)
34 ArgAlign = DL.getABITypeAlign(Ty);
35
36 return std::pair(Ty, *ArgAlign);
37}
38
39namespace llvm {
40
41static cl::opt<bool> DumpHSAMetadata(
42 "amdgpu-dump-hsa-metadata",
43 cl::desc("Dump AMDGPU HSA Metadata"));
44static cl::opt<bool> VerifyHSAMetadata(
45 "amdgpu-verify-hsa-metadata",
46 cl::desc("Verify AMDGPU HSA Metadata"));
47
48namespace AMDGPU {
49namespace HSAMD {
50
51//===----------------------------------------------------------------------===//
52// HSAMetadataStreamerV4
53//===----------------------------------------------------------------------===//
54
55void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
56 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
57}
58
59void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
60 errs() << "AMDGPU HSA Metadata Parser Test: ";
61
62 msgpack::Document FromHSAMetadataString;
63
64 if (!FromHSAMetadataString.fromYAML(S: HSAMetadataString)) {
65 errs() << "FAIL\n";
66 return;
67 }
68
69 std::string ToHSAMetadataString;
70 raw_string_ostream StrOS(ToHSAMetadataString);
71 FromHSAMetadataString.toYAML(OS&: StrOS);
72
73 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
74 if (HSAMetadataString != ToHSAMetadataString) {
75 errs() << "Original input: " << HSAMetadataString << '\n'
76 << "Produced output: " << StrOS.str() << '\n';
77 }
78}
79
80std::optional<StringRef>
81MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
82 return StringSwitch<std::optional<StringRef>>(AccQual)
83 .Case(S: "read_only", Value: StringRef("read_only"))
84 .Case(S: "write_only", Value: StringRef("write_only"))
85 .Case(S: "read_write", Value: StringRef("read_write"))
86 .Default(Value: std::nullopt);
87}
88
89std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
90 unsigned AddressSpace) const {
91 switch (AddressSpace) {
92 case AMDGPUAS::PRIVATE_ADDRESS:
93 return StringRef("private");
94 case AMDGPUAS::GLOBAL_ADDRESS:
95 return StringRef("global");
96 case AMDGPUAS::CONSTANT_ADDRESS:
97 return StringRef("constant");
98 case AMDGPUAS::LOCAL_ADDRESS:
99 return StringRef("local");
100 case AMDGPUAS::FLAT_ADDRESS:
101 return StringRef("generic");
102 case AMDGPUAS::REGION_ADDRESS:
103 return StringRef("region");
104 default:
105 return std::nullopt;
106 }
107}
108
109StringRef
110MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
111 StringRef BaseTypeName) const {
112 if (TypeQual.contains(Other: "pipe"))
113 return "pipe";
114
115 return StringSwitch<StringRef>(BaseTypeName)
116 .Case(S: "image1d_t", Value: "image")
117 .Case(S: "image1d_array_t", Value: "image")
118 .Case(S: "image1d_buffer_t", Value: "image")
119 .Case(S: "image2d_t", Value: "image")
120 .Case(S: "image2d_array_t", Value: "image")
121 .Case(S: "image2d_array_depth_t", Value: "image")
122 .Case(S: "image2d_array_msaa_t", Value: "image")
123 .Case(S: "image2d_array_msaa_depth_t", Value: "image")
124 .Case(S: "image2d_depth_t", Value: "image")
125 .Case(S: "image2d_msaa_t", Value: "image")
126 .Case(S: "image2d_msaa_depth_t", Value: "image")
127 .Case(S: "image3d_t", Value: "image")
128 .Case(S: "sampler_t", Value: "sampler")
129 .Case(S: "queue_t", Value: "queue")
130 .Default(Value: isa<PointerType>(Val: Ty)
131 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
132 ? "dynamic_shared_pointer"
133 : "global_buffer")
134 : "by_value");
135}
136
137std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
138 bool Signed) const {
139 switch (Ty->getTypeID()) {
140 case Type::IntegerTyID: {
141 if (!Signed)
142 return (Twine('u') + getTypeName(Ty, Signed: true)).str();
143
144 auto BitWidth = Ty->getIntegerBitWidth();
145 switch (BitWidth) {
146 case 8:
147 return "char";
148 case 16:
149 return "short";
150 case 32:
151 return "int";
152 case 64:
153 return "long";
154 default:
155 return (Twine('i') + Twine(BitWidth)).str();
156 }
157 }
158 case Type::HalfTyID:
159 return "half";
160 case Type::FloatTyID:
161 return "float";
162 case Type::DoubleTyID:
163 return "double";
164 case Type::FixedVectorTyID: {
165 auto VecTy = cast<FixedVectorType>(Val: Ty);
166 auto ElTy = VecTy->getElementType();
167 auto NumElements = VecTy->getNumElements();
168 return (Twine(getTypeName(Ty: ElTy, Signed)) + Twine(NumElements)).str();
169 }
170 default:
171 return "unknown";
172 }
173}
174
175msgpack::ArrayDocNode
176MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
177 auto Dims = HSAMetadataDoc->getArrayNode();
178 if (Node->getNumOperands() != 3)
179 return Dims;
180
181 for (auto &Op : Node->operands())
182 Dims.push_back(N: Dims.getDocument()->getNode(
183 V: uint64_t(mdconst::extract<ConstantInt>(MD: Op)->getZExtValue())));
184 return Dims;
185}
186
187void MetadataStreamerMsgPackV4::emitVersion() {
188 auto Version = HSAMetadataDoc->getArrayNode();
189 Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV4));
190 Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV4));
191 getRootMetadata(Key: "amdhsa.version") = Version;
192}
193
194void MetadataStreamerMsgPackV4::emitTargetID(
195 const IsaInfo::AMDGPUTargetID &TargetID) {
196 getRootMetadata(Key: "amdhsa.target") =
197 HSAMetadataDoc->getNode(V: TargetID.toString(), /*Copy=*/true);
198}
199
200void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
201 auto Node = Mod.getNamedMetadata(Name: "llvm.printf.fmts");
202 if (!Node)
203 return;
204
205 auto Printf = HSAMetadataDoc->getArrayNode();
206 for (auto *Op : Node->operands())
207 if (Op->getNumOperands())
208 Printf.push_back(N: Printf.getDocument()->getNode(
209 V: cast<MDString>(Val: Op->getOperand(I: 0))->getString(), /*Copy=*/true));
210 getRootMetadata(Key: "amdhsa.printf") = Printf;
211}
212
213void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
214 msgpack::MapDocNode Kern) {
215 // TODO: What about other languages?
216 auto Node = Func.getParent()->getNamedMetadata(Name: "opencl.ocl.version");
217 if (!Node || !Node->getNumOperands())
218 return;
219 auto Op0 = Node->getOperand(i: 0);
220 if (Op0->getNumOperands() <= 1)
221 return;
222
223 Kern[".language"] = Kern.getDocument()->getNode(V: "OpenCL C");
224 auto LanguageVersion = Kern.getDocument()->getArrayNode();
225 LanguageVersion.push_back(N: Kern.getDocument()->getNode(
226 V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 0))->getZExtValue()));
227 LanguageVersion.push_back(N: Kern.getDocument()->getNode(
228 V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 1))->getZExtValue()));
229 Kern[".language_version"] = LanguageVersion;
230}
231
232void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,
233 msgpack::MapDocNode Kern) {
234
235 if (auto Node = Func.getMetadata(Kind: "reqd_work_group_size"))
236 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
237 if (auto Node = Func.getMetadata(Kind: "work_group_size_hint"))
238 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
239 if (auto Node = Func.getMetadata(Kind: "vec_type_hint")) {
240 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
241 V: getTypeName(
242 Ty: cast<ValueAsMetadata>(Val: Node->getOperand(I: 0))->getType(),
243 Signed: mdconst::extract<ConstantInt>(MD: Node->getOperand(I: 1))->getZExtValue()),
244 /*Copy=*/true);
245 }
246 if (Func.hasFnAttribute(Kind: "runtime-handle")) {
247 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
248 V: Func.getFnAttribute(Kind: "runtime-handle").getValueAsString().str(),
249 /*Copy=*/true);
250 }
251 if (Func.hasFnAttribute(Kind: "device-init"))
252 Kern[".kind"] = Kern.getDocument()->getNode(V: "init");
253 else if (Func.hasFnAttribute(Kind: "device-fini"))
254 Kern[".kind"] = Kern.getDocument()->getNode(V: "fini");
255}
256
257void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
258 msgpack::MapDocNode Kern) {
259 auto &Func = MF.getFunction();
260 unsigned Offset = 0;
261 auto Args = HSAMetadataDoc->getArrayNode();
262 for (auto &Arg : Func.args())
263 emitKernelArg(Arg, Offset, Args);
264
265 emitHiddenKernelArgs(MF, Offset, Args);
266
267 Kern[".args"] = Args;
268}
269
270void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
271 unsigned &Offset,
272 msgpack::ArrayDocNode Args) {
273 auto Func = Arg.getParent();
274 auto ArgNo = Arg.getArgNo();
275 const MDNode *Node;
276
277 StringRef Name;
278 Node = Func->getMetadata(Kind: "kernel_arg_name");
279 if (Node && ArgNo < Node->getNumOperands())
280 Name = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
281 else if (Arg.hasName())
282 Name = Arg.getName();
283
284 StringRef TypeName;
285 Node = Func->getMetadata(Kind: "kernel_arg_type");
286 if (Node && ArgNo < Node->getNumOperands())
287 TypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
288
289 StringRef BaseTypeName;
290 Node = Func->getMetadata(Kind: "kernel_arg_base_type");
291 if (Node && ArgNo < Node->getNumOperands())
292 BaseTypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
293
294 StringRef ActAccQual;
295 // Do we really need NoAlias check here?
296 if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
297 if (Arg.onlyReadsMemory())
298 ActAccQual = "read_only";
299 else if (Arg.hasAttribute(Attribute::Kind: WriteOnly))
300 ActAccQual = "write_only";
301 }
302
303 StringRef AccQual;
304 Node = Func->getMetadata(Kind: "kernel_arg_access_qual");
305 if (Node && ArgNo < Node->getNumOperands())
306 AccQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
307
308 StringRef TypeQual;
309 Node = Func->getMetadata(Kind: "kernel_arg_type_qual");
310 if (Node && ArgNo < Node->getNumOperands())
311 TypeQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
312
313 const DataLayout &DL = Func->getParent()->getDataLayout();
314
315 MaybeAlign PointeeAlign;
316 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
317
318 // FIXME: Need to distinguish in memory alignment from pointer alignment.
319 if (auto PtrTy = dyn_cast<PointerType>(Val: Ty)) {
320 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
321 PointeeAlign = Arg.getParamAlign().valueOrOne();
322 }
323
324 // There's no distinction between byval aggregates and raw aggregates.
325 Type *ArgTy;
326 Align ArgAlign;
327 std::tie(args&: ArgTy, args&: ArgAlign) = getArgumentTypeAlign(Arg, DL);
328
329 emitKernelArg(DL, Ty: ArgTy, Alignment: ArgAlign,
330 ValueKind: getValueKind(Ty: ArgTy, TypeQual, BaseTypeName), Offset, Args,
331 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
332 AccQual, TypeQual);
333}
334
335void MetadataStreamerMsgPackV4::emitKernelArg(
336 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
337 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
338 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
339 StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
340 auto Arg = Args.getDocument()->getMapNode();
341
342 if (!Name.empty())
343 Arg[".name"] = Arg.getDocument()->getNode(V: Name, /*Copy=*/true);
344 if (!TypeName.empty())
345 Arg[".type_name"] = Arg.getDocument()->getNode(V: TypeName, /*Copy=*/true);
346 auto Size = DL.getTypeAllocSize(Ty);
347 Arg[".size"] = Arg.getDocument()->getNode(V: Size);
348 Offset = alignTo(Size: Offset, A: Alignment);
349 Arg[".offset"] = Arg.getDocument()->getNode(V: Offset);
350 Offset += Size;
351 Arg[".value_kind"] = Arg.getDocument()->getNode(V: ValueKind, /*Copy=*/true);
352 if (PointeeAlign)
353 Arg[".pointee_align"] = Arg.getDocument()->getNode(V: PointeeAlign->value());
354
355 if (auto PtrTy = dyn_cast<PointerType>(Val: Ty))
356 if (auto Qualifier = getAddressSpaceQualifier(AddressSpace: PtrTy->getAddressSpace()))
357 // Limiting address space to emit only for a certain ValueKind.
358 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
359 Arg[".address_space"] = Arg.getDocument()->getNode(V: *Qualifier,
360 /*Copy=*/true);
361
362 if (auto AQ = getAccessQualifier(AccQual))
363 Arg[".access"] = Arg.getDocument()->getNode(V: *AQ, /*Copy=*/true);
364
365 if (auto AAQ = getAccessQualifier(AccQual: ActAccQual))
366 Arg[".actual_access"] = Arg.getDocument()->getNode(V: *AAQ, /*Copy=*/true);
367
368 SmallVector<StringRef, 1> SplitTypeQuals;
369 TypeQual.split(A&: SplitTypeQuals, Separator: " ", MaxSplit: -1, KeepEmpty: false);
370 for (StringRef Key : SplitTypeQuals) {
371 if (Key == "const")
372 Arg[".is_const"] = Arg.getDocument()->getNode(V: true);
373 else if (Key == "restrict")
374 Arg[".is_restrict"] = Arg.getDocument()->getNode(V: true);
375 else if (Key == "volatile")
376 Arg[".is_volatile"] = Arg.getDocument()->getNode(V: true);
377 else if (Key == "pipe")
378 Arg[".is_pipe"] = Arg.getDocument()->getNode(V: true);
379 }
380
381 Args.push_back(N: Arg);
382}
383
384void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
385 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
386 auto &Func = MF.getFunction();
387 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
388
389 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
390 if (!HiddenArgNumBytes)
391 return;
392
393 const Module *M = Func.getParent();
394 auto &DL = M->getDataLayout();
395 auto Int64Ty = Type::getInt64Ty(C&: Func.getContext());
396
397 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
398
399 if (HiddenArgNumBytes >= 8)
400 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x", Offset,
401 Args);
402 if (HiddenArgNumBytes >= 16)
403 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y", Offset,
404 Args);
405 if (HiddenArgNumBytes >= 24)
406 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z", Offset,
407 Args);
408
409 auto Int8PtrTy =
410 PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS);
411
412 if (HiddenArgNumBytes >= 32) {
413 // We forbid the use of features requiring hostcall when compiling OpenCL
414 // before code object V5, which makes the mutual exclusion between the
415 // "printf buffer" and "hostcall buffer" here sound.
416 if (M->getNamedMetadata(Name: "llvm.printf.fmts"))
417 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer", Offset,
418 Args);
419 else if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr"))
420 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer", Offset,
421 Args);
422 else
423 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
424 }
425
426 // Emit "default queue" and "completion action" arguments if enqueue kernel is
427 // used, otherwise emit dummy "none" arguments.
428 if (HiddenArgNumBytes >= 40) {
429 if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue")) {
430 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue", Offset,
431 Args);
432 } else {
433 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
434 }
435 }
436
437 if (HiddenArgNumBytes >= 48) {
438 if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action")) {
439 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action", Offset,
440 Args);
441 } else {
442 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
443 }
444 }
445
446 // Emit the pointer argument for multi-grid object.
447 if (HiddenArgNumBytes >= 56) {
448 if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg")) {
449 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg", Offset,
450 Args);
451 } else {
452 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
453 }
454 }
455}
456
457msgpack::MapDocNode
458MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
459 const SIProgramInfo &ProgramInfo,
460 unsigned CodeObjectVersion) const {
461 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
462 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
463 const Function &F = MF.getFunction();
464
465 auto Kern = HSAMetadataDoc->getMapNode();
466
467 Align MaxKernArgAlign;
468 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
469 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
470 Kern[".group_segment_fixed_size"] =
471 Kern.getDocument()->getNode(V: ProgramInfo.LDSSize);
472 Kern[".private_segment_fixed_size"] =
473 Kern.getDocument()->getNode(V: ProgramInfo.ScratchSize);
474 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
475 Kern[".uses_dynamic_stack"] =
476 Kern.getDocument()->getNode(V: ProgramInfo.DynamicCallStack);
477
478 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
479 Kern[".workgroup_processor_mode"] =
480 Kern.getDocument()->getNode(V: ProgramInfo.WgpMode);
481
482 // FIXME: The metadata treats the minimum as 16?
483 Kern[".kernarg_segment_align"] =
484 Kern.getDocument()->getNode(V: std::max(a: Align(4), b: MaxKernArgAlign).value());
485 Kern[".wavefront_size"] =
486 Kern.getDocument()->getNode(STM.getWavefrontSize());
487 Kern[".sgpr_count"] = Kern.getDocument()->getNode(V: ProgramInfo.NumSGPR);
488 Kern[".vgpr_count"] = Kern.getDocument()->getNode(V: ProgramInfo.NumVGPR);
489
490 // Only add AGPR count to metadata for supported devices
491 if (STM.hasMAIInsts()) {
492 Kern[".agpr_count"] = Kern.getDocument()->getNode(V: ProgramInfo.NumAccVGPR);
493 }
494
495 Kern[".max_flat_workgroup_size"] =
496 Kern.getDocument()->getNode(V: MFI.getMaxFlatWorkGroupSize());
497 unsigned NumWGX = MFI.getMaxNumWorkGroupsX();
498 unsigned NumWGY = MFI.getMaxNumWorkGroupsY();
499 unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ();
500 if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) {
501 Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(V: NumWGX);
502 Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(V: NumWGY);
503 Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(V: NumWGZ);
504 }
505 Kern[".sgpr_spill_count"] =
506 Kern.getDocument()->getNode(V: MFI.getNumSpilledSGPRs());
507 Kern[".vgpr_spill_count"] =
508 Kern.getDocument()->getNode(V: MFI.getNumSpilledVGPRs());
509
510 return Kern;
511}
512
513bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
514 return TargetStreamer.EmitHSAMetadata(HSAMetadata&: *HSAMetadataDoc, Strict: true);
515}
516
517void MetadataStreamerMsgPackV4::begin(const Module &Mod,
518 const IsaInfo::AMDGPUTargetID &TargetID) {
519 emitVersion();
520 emitTargetID(TargetID);
521 emitPrintf(Mod);
522 getRootMetadata(Key: "amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
523}
524
525void MetadataStreamerMsgPackV4::end() {
526 std::string HSAMetadataString;
527 raw_string_ostream StrOS(HSAMetadataString);
528 HSAMetadataDoc->toYAML(OS&: StrOS);
529
530 if (DumpHSAMetadata)
531 dump(HSAMetadataString: StrOS.str());
532 if (VerifyHSAMetadata)
533 verify(HSAMetadataString: StrOS.str());
534}
535
536void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
537 const SIProgramInfo &ProgramInfo) {
538 auto &Func = MF.getFunction();
539 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
540 Func.getCallingConv() != CallingConv::SPIR_KERNEL)
541 return;
542
543 auto CodeObjectVersion =
544 AMDGPU::getAMDHSACodeObjectVersion(M: *Func.getParent());
545 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
546
547 auto Kernels =
548 getRootMetadata(Key: "amdhsa.kernels").getArray(/*Convert=*/true);
549
550 {
551 Kern[".name"] = Kern.getDocument()->getNode(V: Func.getName());
552 Kern[".symbol"] = Kern.getDocument()->getNode(
553 V: (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
554 emitKernelLanguage(Func, Kern);
555 emitKernelAttrs(Func, Kern);
556 emitKernelArgs(MF, Kern);
557 }
558
559 Kernels.push_back(N: Kern);
560}
561
562//===----------------------------------------------------------------------===//
563// HSAMetadataStreamerV5
564//===----------------------------------------------------------------------===//
565
566void MetadataStreamerMsgPackV5::emitVersion() {
567 auto Version = HSAMetadataDoc->getArrayNode();
568 Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV5));
569 Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV5));
570 getRootMetadata(Key: "amdhsa.version") = Version;
571}
572
573void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
574 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
575 auto &Func = MF.getFunction();
576 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
577
578 // No implicit kernel argument is used.
579 if (ST.getImplicitArgNumBytes(Func) == 0)
580 return;
581
582 const Module *M = Func.getParent();
583 auto &DL = M->getDataLayout();
584 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
585
586 auto Int64Ty = Type::getInt64Ty(C&: Func.getContext());
587 auto Int32Ty = Type::getInt32Ty(C&: Func.getContext());
588 auto Int16Ty = Type::getInt16Ty(C&: Func.getContext());
589
590 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
591 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_x", Offset, Args);
592 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_y", Offset, Args);
593 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_z", Offset, Args);
594
595 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_x", Offset, Args);
596 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_y", Offset, Args);
597 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_z", Offset, Args);
598
599 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_x", Offset, Args);
600 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_y", Offset, Args);
601 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_z", Offset, Args);
602
603 // Reserved for hidden_tool_correlation_id.
604 Offset += 8;
605
606 Offset += 8; // Reserved.
607
608 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x", Offset, Args);
609 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y", Offset, Args);
610 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z", Offset, Args);
611
612 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_grid_dims", Offset, Args);
613
614 Offset += 6; // Reserved.
615 auto Int8PtrTy =
616 PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS);
617
618 if (M->getNamedMetadata(Name: "llvm.printf.fmts")) {
619 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer", Offset,
620 Args);
621 } else {
622 Offset += 8; // Skipped.
623 }
624
625 if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr")) {
626 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer", Offset,
627 Args);
628 } else {
629 Offset += 8; // Skipped.
630 }
631
632 if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg")) {
633 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg", Offset,
634 Args);
635 } else {
636 Offset += 8; // Skipped.
637 }
638
639 if (!Func.hasFnAttribute(Kind: "amdgpu-no-heap-ptr"))
640 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_heap_v1", Offset, Args);
641 else
642 Offset += 8; // Skipped.
643
644 if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue")) {
645 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue", Offset,
646 Args);
647 } else {
648 Offset += 8; // Skipped.
649 }
650
651 if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action")) {
652 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action", Offset,
653 Args);
654 } else {
655 Offset += 8; // Skipped.
656 }
657
658 // Emit argument for hidden dynamic lds size
659 if (MFI.isDynamicLDSUsed()) {
660 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_dynamic_lds_size", Offset,
661 Args);
662 } else {
663 Offset += 4; // skipped
664 }
665
666 Offset += 68; // Reserved.
667
668 // hidden_private_base and hidden_shared_base are only when the subtarget has
669 // ApertureRegs.
670 if (!ST.hasApertureRegs()) {
671 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_private_base", Offset, Args);
672 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_shared_base", Offset, Args);
673 } else {
674 Offset += 8; // Skipped.
675 }
676
677 if (MFI.getUserSGPRInfo().hasQueuePtr())
678 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_queue_ptr", Offset, Args);
679}
680
681void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
682 msgpack::MapDocNode Kern) {
683 MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);
684
685 if (Func.getFnAttribute(Kind: "uniform-work-group-size").getValueAsBool())
686 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(V: 1);
687}
688
689//===----------------------------------------------------------------------===//
690// HSAMetadataStreamerV6
691//===----------------------------------------------------------------------===//
692
693void MetadataStreamerMsgPackV6::emitVersion() {
694 auto Version = HSAMetadataDoc->getArrayNode();
695 Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV6));
696 Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV6));
697 getRootMetadata(Key: "amdhsa.version") = Version;
698}
699
700} // end namespace HSAMD
701} // end namespace AMDGPU
702} // end namespace llvm
703

source code of llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp