1 | //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// AMDGPU HSA Metadata Streamer. |
11 | /// |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AMDGPUHSAMetadataStreamer.h" |
16 | #include "AMDGPU.h" |
17 | #include "GCNSubtarget.h" |
18 | #include "MCTargetDesc/AMDGPUTargetStreamer.h" |
19 | #include "SIMachineFunctionInfo.h" |
20 | #include "SIProgramInfo.h" |
21 | #include "llvm/IR/Module.h" |
22 | using namespace llvm; |
23 | |
24 | static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, |
25 | const DataLayout &DL) { |
26 | Type *Ty = Arg.getType(); |
27 | MaybeAlign ArgAlign; |
28 | if (Arg.hasByRefAttr()) { |
29 | Ty = Arg.getParamByRefType(); |
30 | ArgAlign = Arg.getParamAlign(); |
31 | } |
32 | |
33 | if (!ArgAlign) |
34 | ArgAlign = DL.getABITypeAlign(Ty); |
35 | |
36 | return std::pair(Ty, *ArgAlign); |
37 | } |
38 | |
39 | namespace llvm { |
40 | |
41 | static cl::opt<bool> DumpHSAMetadata( |
42 | "amdgpu-dump-hsa-metadata" , |
43 | cl::desc("Dump AMDGPU HSA Metadata" )); |
44 | static cl::opt<bool> VerifyHSAMetadata( |
45 | "amdgpu-verify-hsa-metadata" , |
46 | cl::desc("Verify AMDGPU HSA Metadata" )); |
47 | |
48 | namespace AMDGPU { |
49 | namespace HSAMD { |
50 | |
51 | //===----------------------------------------------------------------------===// |
52 | // HSAMetadataStreamerV4 |
53 | //===----------------------------------------------------------------------===// |
54 | |
55 | void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const { |
56 | errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; |
57 | } |
58 | |
59 | void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const { |
60 | errs() << "AMDGPU HSA Metadata Parser Test: " ; |
61 | |
62 | msgpack::Document FromHSAMetadataString; |
63 | |
64 | if (!FromHSAMetadataString.fromYAML(S: HSAMetadataString)) { |
65 | errs() << "FAIL\n" ; |
66 | return; |
67 | } |
68 | |
69 | std::string ToHSAMetadataString; |
70 | raw_string_ostream StrOS(ToHSAMetadataString); |
71 | FromHSAMetadataString.toYAML(OS&: StrOS); |
72 | |
73 | errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL" ) << '\n'; |
74 | if (HSAMetadataString != ToHSAMetadataString) { |
75 | errs() << "Original input: " << HSAMetadataString << '\n' |
76 | << "Produced output: " << StrOS.str() << '\n'; |
77 | } |
78 | } |
79 | |
80 | std::optional<StringRef> |
81 | MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const { |
82 | return StringSwitch<std::optional<StringRef>>(AccQual) |
83 | .Case(S: "read_only" , Value: StringRef("read_only" )) |
84 | .Case(S: "write_only" , Value: StringRef("write_only" )) |
85 | .Case(S: "read_write" , Value: StringRef("read_write" )) |
86 | .Default(Value: std::nullopt); |
87 | } |
88 | |
89 | std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier( |
90 | unsigned AddressSpace) const { |
91 | switch (AddressSpace) { |
92 | case AMDGPUAS::PRIVATE_ADDRESS: |
93 | return StringRef("private" ); |
94 | case AMDGPUAS::GLOBAL_ADDRESS: |
95 | return StringRef("global" ); |
96 | case AMDGPUAS::CONSTANT_ADDRESS: |
97 | return StringRef("constant" ); |
98 | case AMDGPUAS::LOCAL_ADDRESS: |
99 | return StringRef("local" ); |
100 | case AMDGPUAS::FLAT_ADDRESS: |
101 | return StringRef("generic" ); |
102 | case AMDGPUAS::REGION_ADDRESS: |
103 | return StringRef("region" ); |
104 | default: |
105 | return std::nullopt; |
106 | } |
107 | } |
108 | |
109 | StringRef |
110 | MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual, |
111 | StringRef BaseTypeName) const { |
112 | if (TypeQual.contains(Other: "pipe" )) |
113 | return "pipe" ; |
114 | |
115 | return StringSwitch<StringRef>(BaseTypeName) |
116 | .Case(S: "image1d_t" , Value: "image" ) |
117 | .Case(S: "image1d_array_t" , Value: "image" ) |
118 | .Case(S: "image1d_buffer_t" , Value: "image" ) |
119 | .Case(S: "image2d_t" , Value: "image" ) |
120 | .Case(S: "image2d_array_t" , Value: "image" ) |
121 | .Case(S: "image2d_array_depth_t" , Value: "image" ) |
122 | .Case(S: "image2d_array_msaa_t" , Value: "image" ) |
123 | .Case(S: "image2d_array_msaa_depth_t" , Value: "image" ) |
124 | .Case(S: "image2d_depth_t" , Value: "image" ) |
125 | .Case(S: "image2d_msaa_t" , Value: "image" ) |
126 | .Case(S: "image2d_msaa_depth_t" , Value: "image" ) |
127 | .Case(S: "image3d_t" , Value: "image" ) |
128 | .Case(S: "sampler_t" , Value: "sampler" ) |
129 | .Case(S: "queue_t" , Value: "queue" ) |
130 | .Default(Value: isa<PointerType>(Val: Ty) |
131 | ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS |
132 | ? "dynamic_shared_pointer" |
133 | : "global_buffer" ) |
134 | : "by_value" ); |
135 | } |
136 | |
137 | std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty, |
138 | bool Signed) const { |
139 | switch (Ty->getTypeID()) { |
140 | case Type::IntegerTyID: { |
141 | if (!Signed) |
142 | return (Twine('u') + getTypeName(Ty, Signed: true)).str(); |
143 | |
144 | auto BitWidth = Ty->getIntegerBitWidth(); |
145 | switch (BitWidth) { |
146 | case 8: |
147 | return "char" ; |
148 | case 16: |
149 | return "short" ; |
150 | case 32: |
151 | return "int" ; |
152 | case 64: |
153 | return "long" ; |
154 | default: |
155 | return (Twine('i') + Twine(BitWidth)).str(); |
156 | } |
157 | } |
158 | case Type::HalfTyID: |
159 | return "half" ; |
160 | case Type::FloatTyID: |
161 | return "float" ; |
162 | case Type::DoubleTyID: |
163 | return "double" ; |
164 | case Type::FixedVectorTyID: { |
165 | auto VecTy = cast<FixedVectorType>(Val: Ty); |
166 | auto ElTy = VecTy->getElementType(); |
167 | auto NumElements = VecTy->getNumElements(); |
168 | return (Twine(getTypeName(Ty: ElTy, Signed)) + Twine(NumElements)).str(); |
169 | } |
170 | default: |
171 | return "unknown" ; |
172 | } |
173 | } |
174 | |
175 | msgpack::ArrayDocNode |
176 | MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const { |
177 | auto Dims = HSAMetadataDoc->getArrayNode(); |
178 | if (Node->getNumOperands() != 3) |
179 | return Dims; |
180 | |
181 | for (auto &Op : Node->operands()) |
182 | Dims.push_back(N: Dims.getDocument()->getNode( |
183 | V: uint64_t(mdconst::extract<ConstantInt>(MD: Op)->getZExtValue()))); |
184 | return Dims; |
185 | } |
186 | |
187 | void MetadataStreamerMsgPackV4::emitVersion() { |
188 | auto Version = HSAMetadataDoc->getArrayNode(); |
189 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV4)); |
190 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV4)); |
191 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
192 | } |
193 | |
194 | void MetadataStreamerMsgPackV4::emitTargetID( |
195 | const IsaInfo::AMDGPUTargetID &TargetID) { |
196 | getRootMetadata(Key: "amdhsa.target" ) = |
197 | HSAMetadataDoc->getNode(V: TargetID.toString(), /*Copy=*/true); |
198 | } |
199 | |
200 | void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) { |
201 | auto Node = Mod.getNamedMetadata(Name: "llvm.printf.fmts" ); |
202 | if (!Node) |
203 | return; |
204 | |
205 | auto Printf = HSAMetadataDoc->getArrayNode(); |
206 | for (auto *Op : Node->operands()) |
207 | if (Op->getNumOperands()) |
208 | Printf.push_back(N: Printf.getDocument()->getNode( |
209 | V: cast<MDString>(Val: Op->getOperand(I: 0))->getString(), /*Copy=*/true)); |
210 | getRootMetadata(Key: "amdhsa.printf" ) = Printf; |
211 | } |
212 | |
213 | void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, |
214 | msgpack::MapDocNode Kern) { |
215 | // TODO: What about other languages? |
216 | auto Node = Func.getParent()->getNamedMetadata(Name: "opencl.ocl.version" ); |
217 | if (!Node || !Node->getNumOperands()) |
218 | return; |
219 | auto Op0 = Node->getOperand(i: 0); |
220 | if (Op0->getNumOperands() <= 1) |
221 | return; |
222 | |
223 | Kern[".language" ] = Kern.getDocument()->getNode(V: "OpenCL C" ); |
224 | auto LanguageVersion = Kern.getDocument()->getArrayNode(); |
225 | LanguageVersion.push_back(N: Kern.getDocument()->getNode( |
226 | V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 0))->getZExtValue())); |
227 | LanguageVersion.push_back(N: Kern.getDocument()->getNode( |
228 | V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 1))->getZExtValue())); |
229 | Kern[".language_version" ] = LanguageVersion; |
230 | } |
231 | |
232 | void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func, |
233 | msgpack::MapDocNode Kern) { |
234 | |
235 | if (auto Node = Func.getMetadata(Kind: "reqd_work_group_size" )) |
236 | Kern[".reqd_workgroup_size" ] = getWorkGroupDimensions(Node); |
237 | if (auto Node = Func.getMetadata(Kind: "work_group_size_hint" )) |
238 | Kern[".workgroup_size_hint" ] = getWorkGroupDimensions(Node); |
239 | if (auto Node = Func.getMetadata(Kind: "vec_type_hint" )) { |
240 | Kern[".vec_type_hint" ] = Kern.getDocument()->getNode( |
241 | V: getTypeName( |
242 | Ty: cast<ValueAsMetadata>(Val: Node->getOperand(I: 0))->getType(), |
243 | Signed: mdconst::extract<ConstantInt>(MD: Node->getOperand(I: 1))->getZExtValue()), |
244 | /*Copy=*/true); |
245 | } |
246 | if (Func.hasFnAttribute(Kind: "runtime-handle" )) { |
247 | Kern[".device_enqueue_symbol" ] = Kern.getDocument()->getNode( |
248 | V: Func.getFnAttribute(Kind: "runtime-handle" ).getValueAsString().str(), |
249 | /*Copy=*/true); |
250 | } |
251 | if (Func.hasFnAttribute(Kind: "device-init" )) |
252 | Kern[".kind" ] = Kern.getDocument()->getNode(V: "init" ); |
253 | else if (Func.hasFnAttribute(Kind: "device-fini" )) |
254 | Kern[".kind" ] = Kern.getDocument()->getNode(V: "fini" ); |
255 | } |
256 | |
257 | void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF, |
258 | msgpack::MapDocNode Kern) { |
259 | auto &Func = MF.getFunction(); |
260 | unsigned Offset = 0; |
261 | auto Args = HSAMetadataDoc->getArrayNode(); |
262 | for (auto &Arg : Func.args()) |
263 | emitKernelArg(Arg, Offset, Args); |
264 | |
265 | emitHiddenKernelArgs(MF, Offset, Args); |
266 | |
267 | Kern[".args" ] = Args; |
268 | } |
269 | |
270 | void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg, |
271 | unsigned &Offset, |
272 | msgpack::ArrayDocNode Args) { |
273 | auto Func = Arg.getParent(); |
274 | auto ArgNo = Arg.getArgNo(); |
275 | const MDNode *Node; |
276 | |
277 | StringRef Name; |
278 | Node = Func->getMetadata(Kind: "kernel_arg_name" ); |
279 | if (Node && ArgNo < Node->getNumOperands()) |
280 | Name = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
281 | else if (Arg.hasName()) |
282 | Name = Arg.getName(); |
283 | |
284 | StringRef TypeName; |
285 | Node = Func->getMetadata(Kind: "kernel_arg_type" ); |
286 | if (Node && ArgNo < Node->getNumOperands()) |
287 | TypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
288 | |
289 | StringRef BaseTypeName; |
290 | Node = Func->getMetadata(Kind: "kernel_arg_base_type" ); |
291 | if (Node && ArgNo < Node->getNumOperands()) |
292 | BaseTypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
293 | |
294 | StringRef ActAccQual; |
295 | // Do we really need NoAlias check here? |
296 | if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) { |
297 | if (Arg.onlyReadsMemory()) |
298 | ActAccQual = "read_only" ; |
299 | else if (Arg.hasAttribute(Attribute::Kind: WriteOnly)) |
300 | ActAccQual = "write_only" ; |
301 | } |
302 | |
303 | StringRef AccQual; |
304 | Node = Func->getMetadata(Kind: "kernel_arg_access_qual" ); |
305 | if (Node && ArgNo < Node->getNumOperands()) |
306 | AccQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
307 | |
308 | StringRef TypeQual; |
309 | Node = Func->getMetadata(Kind: "kernel_arg_type_qual" ); |
310 | if (Node && ArgNo < Node->getNumOperands()) |
311 | TypeQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
312 | |
313 | const DataLayout &DL = Func->getParent()->getDataLayout(); |
314 | |
315 | MaybeAlign PointeeAlign; |
316 | Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); |
317 | |
318 | // FIXME: Need to distinguish in memory alignment from pointer alignment. |
319 | if (auto PtrTy = dyn_cast<PointerType>(Val: Ty)) { |
320 | if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) |
321 | PointeeAlign = Arg.getParamAlign().valueOrOne(); |
322 | } |
323 | |
324 | // There's no distinction between byval aggregates and raw aggregates. |
325 | Type *ArgTy; |
326 | Align ArgAlign; |
327 | std::tie(args&: ArgTy, args&: ArgAlign) = getArgumentTypeAlign(Arg, DL); |
328 | |
329 | emitKernelArg(DL, Ty: ArgTy, Alignment: ArgAlign, |
330 | ValueKind: getValueKind(Ty: ArgTy, TypeQual, BaseTypeName), Offset, Args, |
331 | PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual, |
332 | AccQual, TypeQual); |
333 | } |
334 | |
335 | void MetadataStreamerMsgPackV4::emitKernelArg( |
336 | const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, |
337 | unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, |
338 | StringRef Name, StringRef TypeName, StringRef BaseTypeName, |
339 | StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) { |
340 | auto Arg = Args.getDocument()->getMapNode(); |
341 | |
342 | if (!Name.empty()) |
343 | Arg[".name" ] = Arg.getDocument()->getNode(V: Name, /*Copy=*/true); |
344 | if (!TypeName.empty()) |
345 | Arg[".type_name" ] = Arg.getDocument()->getNode(V: TypeName, /*Copy=*/true); |
346 | auto Size = DL.getTypeAllocSize(Ty); |
347 | Arg[".size" ] = Arg.getDocument()->getNode(V: Size); |
348 | Offset = alignTo(Size: Offset, A: Alignment); |
349 | Arg[".offset" ] = Arg.getDocument()->getNode(V: Offset); |
350 | Offset += Size; |
351 | Arg[".value_kind" ] = Arg.getDocument()->getNode(V: ValueKind, /*Copy=*/true); |
352 | if (PointeeAlign) |
353 | Arg[".pointee_align" ] = Arg.getDocument()->getNode(V: PointeeAlign->value()); |
354 | |
355 | if (auto PtrTy = dyn_cast<PointerType>(Val: Ty)) |
356 | if (auto Qualifier = getAddressSpaceQualifier(AddressSpace: PtrTy->getAddressSpace())) |
357 | // Limiting address space to emit only for a certain ValueKind. |
358 | if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer" ) |
359 | Arg[".address_space" ] = Arg.getDocument()->getNode(V: *Qualifier, |
360 | /*Copy=*/true); |
361 | |
362 | if (auto AQ = getAccessQualifier(AccQual)) |
363 | Arg[".access" ] = Arg.getDocument()->getNode(V: *AQ, /*Copy=*/true); |
364 | |
365 | if (auto AAQ = getAccessQualifier(AccQual: ActAccQual)) |
366 | Arg[".actual_access" ] = Arg.getDocument()->getNode(V: *AAQ, /*Copy=*/true); |
367 | |
368 | SmallVector<StringRef, 1> SplitTypeQuals; |
369 | TypeQual.split(A&: SplitTypeQuals, Separator: " " , MaxSplit: -1, KeepEmpty: false); |
370 | for (StringRef Key : SplitTypeQuals) { |
371 | if (Key == "const" ) |
372 | Arg[".is_const" ] = Arg.getDocument()->getNode(V: true); |
373 | else if (Key == "restrict" ) |
374 | Arg[".is_restrict" ] = Arg.getDocument()->getNode(V: true); |
375 | else if (Key == "volatile" ) |
376 | Arg[".is_volatile" ] = Arg.getDocument()->getNode(V: true); |
377 | else if (Key == "pipe" ) |
378 | Arg[".is_pipe" ] = Arg.getDocument()->getNode(V: true); |
379 | } |
380 | |
381 | Args.push_back(N: Arg); |
382 | } |
383 | |
384 | void MetadataStreamerMsgPackV4::emitHiddenKernelArgs( |
385 | const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { |
386 | auto &Func = MF.getFunction(); |
387 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
388 | |
389 | unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func); |
390 | if (!HiddenArgNumBytes) |
391 | return; |
392 | |
393 | const Module *M = Func.getParent(); |
394 | auto &DL = M->getDataLayout(); |
395 | auto Int64Ty = Type::getInt64Ty(C&: Func.getContext()); |
396 | |
397 | Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); |
398 | |
399 | if (HiddenArgNumBytes >= 8) |
400 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x" , Offset, |
401 | Args); |
402 | if (HiddenArgNumBytes >= 16) |
403 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y" , Offset, |
404 | Args); |
405 | if (HiddenArgNumBytes >= 24) |
406 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z" , Offset, |
407 | Args); |
408 | |
409 | auto Int8PtrTy = |
410 | PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS); |
411 | |
412 | if (HiddenArgNumBytes >= 32) { |
413 | // We forbid the use of features requiring hostcall when compiling OpenCL |
414 | // before code object V5, which makes the mutual exclusion between the |
415 | // "printf buffer" and "hostcall buffer" here sound. |
416 | if (M->getNamedMetadata(Name: "llvm.printf.fmts" )) |
417 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer" , Offset, |
418 | Args); |
419 | else if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr" )) |
420 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer" , Offset, |
421 | Args); |
422 | else |
423 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
424 | } |
425 | |
426 | // Emit "default queue" and "completion action" arguments if enqueue kernel is |
427 | // used, otherwise emit dummy "none" arguments. |
428 | if (HiddenArgNumBytes >= 40) { |
429 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue" )) { |
430 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue" , Offset, |
431 | Args); |
432 | } else { |
433 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
434 | } |
435 | } |
436 | |
437 | if (HiddenArgNumBytes >= 48) { |
438 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action" )) { |
439 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action" , Offset, |
440 | Args); |
441 | } else { |
442 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
443 | } |
444 | } |
445 | |
446 | // Emit the pointer argument for multi-grid object. |
447 | if (HiddenArgNumBytes >= 56) { |
448 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg" )) { |
449 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg" , Offset, |
450 | Args); |
451 | } else { |
452 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
453 | } |
454 | } |
455 | } |
456 | |
457 | msgpack::MapDocNode |
458 | MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, |
459 | const SIProgramInfo &ProgramInfo, |
460 | unsigned CodeObjectVersion) const { |
461 | const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); |
462 | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
463 | const Function &F = MF.getFunction(); |
464 | |
465 | auto Kern = HSAMetadataDoc->getMapNode(); |
466 | |
467 | Align MaxKernArgAlign; |
468 | Kern[".kernarg_segment_size" ] = Kern.getDocument()->getNode( |
469 | STM.getKernArgSegmentSize(F, MaxKernArgAlign)); |
470 | Kern[".group_segment_fixed_size" ] = |
471 | Kern.getDocument()->getNode(V: ProgramInfo.LDSSize); |
472 | Kern[".private_segment_fixed_size" ] = |
473 | Kern.getDocument()->getNode(V: ProgramInfo.ScratchSize); |
474 | if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) |
475 | Kern[".uses_dynamic_stack" ] = |
476 | Kern.getDocument()->getNode(V: ProgramInfo.DynamicCallStack); |
477 | |
478 | if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP()) |
479 | Kern[".workgroup_processor_mode" ] = |
480 | Kern.getDocument()->getNode(V: ProgramInfo.WgpMode); |
481 | |
482 | // FIXME: The metadata treats the minimum as 16? |
483 | Kern[".kernarg_segment_align" ] = |
484 | Kern.getDocument()->getNode(V: std::max(a: Align(4), b: MaxKernArgAlign).value()); |
485 | Kern[".wavefront_size" ] = |
486 | Kern.getDocument()->getNode(STM.getWavefrontSize()); |
487 | Kern[".sgpr_count" ] = Kern.getDocument()->getNode(V: ProgramInfo.NumSGPR); |
488 | Kern[".vgpr_count" ] = Kern.getDocument()->getNode(V: ProgramInfo.NumVGPR); |
489 | |
490 | // Only add AGPR count to metadata for supported devices |
491 | if (STM.hasMAIInsts()) { |
492 | Kern[".agpr_count" ] = Kern.getDocument()->getNode(V: ProgramInfo.NumAccVGPR); |
493 | } |
494 | |
495 | Kern[".max_flat_workgroup_size" ] = |
496 | Kern.getDocument()->getNode(V: MFI.getMaxFlatWorkGroupSize()); |
497 | unsigned NumWGX = MFI.getMaxNumWorkGroupsX(); |
498 | unsigned NumWGY = MFI.getMaxNumWorkGroupsY(); |
499 | unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ(); |
500 | if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) { |
501 | Kern[".max_num_workgroups_x" ] = Kern.getDocument()->getNode(V: NumWGX); |
502 | Kern[".max_num_workgroups_y" ] = Kern.getDocument()->getNode(V: NumWGY); |
503 | Kern[".max_num_workgroups_z" ] = Kern.getDocument()->getNode(V: NumWGZ); |
504 | } |
505 | Kern[".sgpr_spill_count" ] = |
506 | Kern.getDocument()->getNode(V: MFI.getNumSpilledSGPRs()); |
507 | Kern[".vgpr_spill_count" ] = |
508 | Kern.getDocument()->getNode(V: MFI.getNumSpilledVGPRs()); |
509 | |
510 | return Kern; |
511 | } |
512 | |
513 | bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { |
514 | return TargetStreamer.EmitHSAMetadata(HSAMetadata&: *HSAMetadataDoc, Strict: true); |
515 | } |
516 | |
517 | void MetadataStreamerMsgPackV4::begin(const Module &Mod, |
518 | const IsaInfo::AMDGPUTargetID &TargetID) { |
519 | emitVersion(); |
520 | emitTargetID(TargetID); |
521 | emitPrintf(Mod); |
522 | getRootMetadata(Key: "amdhsa.kernels" ) = HSAMetadataDoc->getArrayNode(); |
523 | } |
524 | |
525 | void MetadataStreamerMsgPackV4::end() { |
526 | std::string HSAMetadataString; |
527 | raw_string_ostream StrOS(HSAMetadataString); |
528 | HSAMetadataDoc->toYAML(OS&: StrOS); |
529 | |
530 | if (DumpHSAMetadata) |
531 | dump(HSAMetadataString: StrOS.str()); |
532 | if (VerifyHSAMetadata) |
533 | verify(HSAMetadataString: StrOS.str()); |
534 | } |
535 | |
536 | void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, |
537 | const SIProgramInfo &ProgramInfo) { |
538 | auto &Func = MF.getFunction(); |
539 | if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && |
540 | Func.getCallingConv() != CallingConv::SPIR_KERNEL) |
541 | return; |
542 | |
543 | auto CodeObjectVersion = |
544 | AMDGPU::getAMDHSACodeObjectVersion(M: *Func.getParent()); |
545 | auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); |
546 | |
547 | auto Kernels = |
548 | getRootMetadata(Key: "amdhsa.kernels" ).getArray(/*Convert=*/true); |
549 | |
550 | { |
551 | Kern[".name" ] = Kern.getDocument()->getNode(V: Func.getName()); |
552 | Kern[".symbol" ] = Kern.getDocument()->getNode( |
553 | V: (Twine(Func.getName()) + Twine(".kd" )).str(), /*Copy=*/true); |
554 | emitKernelLanguage(Func, Kern); |
555 | emitKernelAttrs(Func, Kern); |
556 | emitKernelArgs(MF, Kern); |
557 | } |
558 | |
559 | Kernels.push_back(N: Kern); |
560 | } |
561 | |
562 | //===----------------------------------------------------------------------===// |
563 | // HSAMetadataStreamerV5 |
564 | //===----------------------------------------------------------------------===// |
565 | |
566 | void MetadataStreamerMsgPackV5::emitVersion() { |
567 | auto Version = HSAMetadataDoc->getArrayNode(); |
568 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV5)); |
569 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV5)); |
570 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
571 | } |
572 | |
573 | void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( |
574 | const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { |
575 | auto &Func = MF.getFunction(); |
576 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
577 | |
578 | // No implicit kernel argument is used. |
579 | if (ST.getImplicitArgNumBytes(Func) == 0) |
580 | return; |
581 | |
582 | const Module *M = Func.getParent(); |
583 | auto &DL = M->getDataLayout(); |
584 | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
585 | |
586 | auto Int64Ty = Type::getInt64Ty(C&: Func.getContext()); |
587 | auto Int32Ty = Type::getInt32Ty(C&: Func.getContext()); |
588 | auto Int16Ty = Type::getInt16Ty(C&: Func.getContext()); |
589 | |
590 | Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr()); |
591 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_x" , Offset, Args); |
592 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_y" , Offset, Args); |
593 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_z" , Offset, Args); |
594 | |
595 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_x" , Offset, Args); |
596 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_y" , Offset, Args); |
597 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_z" , Offset, Args); |
598 | |
599 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_x" , Offset, Args); |
600 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_y" , Offset, Args); |
601 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_z" , Offset, Args); |
602 | |
603 | // Reserved for hidden_tool_correlation_id. |
604 | Offset += 8; |
605 | |
606 | Offset += 8; // Reserved. |
607 | |
608 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x" , Offset, Args); |
609 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y" , Offset, Args); |
610 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z" , Offset, Args); |
611 | |
612 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_grid_dims" , Offset, Args); |
613 | |
614 | Offset += 6; // Reserved. |
615 | auto Int8PtrTy = |
616 | PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS); |
617 | |
618 | if (M->getNamedMetadata(Name: "llvm.printf.fmts" )) { |
619 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer" , Offset, |
620 | Args); |
621 | } else { |
622 | Offset += 8; // Skipped. |
623 | } |
624 | |
625 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr" )) { |
626 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer" , Offset, |
627 | Args); |
628 | } else { |
629 | Offset += 8; // Skipped. |
630 | } |
631 | |
632 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg" )) { |
633 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg" , Offset, |
634 | Args); |
635 | } else { |
636 | Offset += 8; // Skipped. |
637 | } |
638 | |
639 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-heap-ptr" )) |
640 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_heap_v1" , Offset, Args); |
641 | else |
642 | Offset += 8; // Skipped. |
643 | |
644 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue" )) { |
645 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue" , Offset, |
646 | Args); |
647 | } else { |
648 | Offset += 8; // Skipped. |
649 | } |
650 | |
651 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action" )) { |
652 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action" , Offset, |
653 | Args); |
654 | } else { |
655 | Offset += 8; // Skipped. |
656 | } |
657 | |
658 | // Emit argument for hidden dynamic lds size |
659 | if (MFI.isDynamicLDSUsed()) { |
660 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_dynamic_lds_size" , Offset, |
661 | Args); |
662 | } else { |
663 | Offset += 4; // skipped |
664 | } |
665 | |
666 | Offset += 68; // Reserved. |
667 | |
668 | // hidden_private_base and hidden_shared_base are only when the subtarget has |
669 | // ApertureRegs. |
670 | if (!ST.hasApertureRegs()) { |
671 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_private_base" , Offset, Args); |
672 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_shared_base" , Offset, Args); |
673 | } else { |
674 | Offset += 8; // Skipped. |
675 | } |
676 | |
677 | if (MFI.getUserSGPRInfo().hasQueuePtr()) |
678 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_queue_ptr" , Offset, Args); |
679 | } |
680 | |
681 | void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, |
682 | msgpack::MapDocNode Kern) { |
683 | MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern); |
684 | |
685 | if (Func.getFnAttribute(Kind: "uniform-work-group-size" ).getValueAsBool()) |
686 | Kern[".uniform_work_group_size" ] = Kern.getDocument()->getNode(V: 1); |
687 | } |
688 | |
689 | //===----------------------------------------------------------------------===// |
690 | // HSAMetadataStreamerV6 |
691 | //===----------------------------------------------------------------------===// |
692 | |
693 | void MetadataStreamerMsgPackV6::emitVersion() { |
694 | auto Version = HSAMetadataDoc->getArrayNode(); |
695 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV6)); |
696 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV6)); |
697 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
698 | } |
699 | |
700 | } // end namespace HSAMD |
701 | } // end namespace AMDGPU |
702 | } // end namespace llvm |
703 | |