1 | //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPUBaseInfo.h" |
10 | #include "AMDGPU.h" |
11 | #include "AMDGPUAsmUtils.h" |
12 | #include "AMDKernelCodeT.h" |
13 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
14 | #include "llvm/ADT/StringExtras.h" |
15 | #include "llvm/BinaryFormat/ELF.h" |
16 | #include "llvm/IR/Attributes.h" |
17 | #include "llvm/IR/Constants.h" |
18 | #include "llvm/IR/Function.h" |
19 | #include "llvm/IR/GlobalValue.h" |
20 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
21 | #include "llvm/IR/IntrinsicsR600.h" |
22 | #include "llvm/IR/LLVMContext.h" |
23 | #include "llvm/MC/MCInstrInfo.h" |
24 | #include "llvm/MC/MCRegisterInfo.h" |
25 | #include "llvm/MC/MCSubtargetInfo.h" |
26 | #include "llvm/Support/AMDHSAKernelDescriptor.h" |
27 | #include "llvm/Support/CommandLine.h" |
28 | #include "llvm/TargetParser/TargetParser.h" |
29 | #include <optional> |
30 | |
31 | #define GET_INSTRINFO_NAMED_OPS |
32 | #define GET_INSTRMAP_INFO |
33 | #include "AMDGPUGenInstrInfo.inc" |
34 | |
35 | static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion( |
36 | "amdhsa-code-object-version" , llvm::cl::Hidden, |
37 | llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5), |
38 | llvm::cl::desc("Set default AMDHSA Code Object Version (module flag " |
39 | "or asm directive still take priority if present)" )); |
40 | |
41 | namespace { |
42 | |
43 | /// \returns Bit mask for given bit \p Shift and bit \p Width. |
44 | unsigned getBitMask(unsigned Shift, unsigned Width) { |
45 | return ((1 << Width) - 1) << Shift; |
46 | } |
47 | |
48 | /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. |
49 | /// |
50 | /// \returns Packed \p Dst. |
51 | unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { |
52 | unsigned Mask = getBitMask(Shift, Width); |
53 | return ((Src << Shift) & Mask) | (Dst & ~Mask); |
54 | } |
55 | |
56 | /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. |
57 | /// |
58 | /// \returns Unpacked bits. |
59 | unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { |
60 | return (Src & getBitMask(Shift, Width)) >> Shift; |
61 | } |
62 | |
63 | /// \returns Vmcnt bit shift (lower bits). |
64 | unsigned getVmcntBitShiftLo(unsigned VersionMajor) { |
65 | return VersionMajor >= 11 ? 10 : 0; |
66 | } |
67 | |
68 | /// \returns Vmcnt bit width (lower bits). |
69 | unsigned getVmcntBitWidthLo(unsigned VersionMajor) { |
70 | return VersionMajor >= 11 ? 6 : 4; |
71 | } |
72 | |
73 | /// \returns Expcnt bit shift. |
74 | unsigned getExpcntBitShift(unsigned VersionMajor) { |
75 | return VersionMajor >= 11 ? 0 : 4; |
76 | } |
77 | |
78 | /// \returns Expcnt bit width. |
79 | unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; } |
80 | |
81 | /// \returns Lgkmcnt bit shift. |
82 | unsigned getLgkmcntBitShift(unsigned VersionMajor) { |
83 | return VersionMajor >= 11 ? 4 : 8; |
84 | } |
85 | |
86 | /// \returns Lgkmcnt bit width. |
87 | unsigned getLgkmcntBitWidth(unsigned VersionMajor) { |
88 | return VersionMajor >= 10 ? 6 : 4; |
89 | } |
90 | |
91 | /// \returns Vmcnt bit shift (higher bits). |
92 | unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; } |
93 | |
94 | /// \returns Vmcnt bit width (higher bits). |
95 | unsigned getVmcntBitWidthHi(unsigned VersionMajor) { |
96 | return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0; |
97 | } |
98 | |
99 | /// \returns Loadcnt bit width |
100 | unsigned getLoadcntBitWidth(unsigned VersionMajor) { |
101 | return VersionMajor >= 12 ? 6 : 0; |
102 | } |
103 | |
104 | /// \returns Samplecnt bit width. |
105 | unsigned getSamplecntBitWidth(unsigned VersionMajor) { |
106 | return VersionMajor >= 12 ? 6 : 0; |
107 | } |
108 | |
109 | /// \returns Bvhcnt bit width. |
110 | unsigned getBvhcntBitWidth(unsigned VersionMajor) { |
111 | return VersionMajor >= 12 ? 3 : 0; |
112 | } |
113 | |
114 | /// \returns Dscnt bit width. |
115 | unsigned getDscntBitWidth(unsigned VersionMajor) { |
116 | return VersionMajor >= 12 ? 6 : 0; |
117 | } |
118 | |
119 | /// \returns Dscnt bit shift in combined S_WAIT instructions. |
120 | unsigned getDscntBitShift(unsigned VersionMajor) { return 0; } |
121 | |
122 | /// \returns Storecnt or Vscnt bit width, depending on VersionMajor. |
123 | unsigned getStorecntBitWidth(unsigned VersionMajor) { |
124 | return VersionMajor >= 10 ? 6 : 0; |
125 | } |
126 | |
127 | /// \returns Kmcnt bit width. |
128 | unsigned getKmcntBitWidth(unsigned VersionMajor) { |
129 | return VersionMajor >= 12 ? 5 : 0; |
130 | } |
131 | |
132 | /// \returns shift for Loadcnt/Storecnt in combined S_WAIT instructions. |
133 | unsigned getLoadcntStorecntBitShift(unsigned VersionMajor) { |
134 | return VersionMajor >= 12 ? 8 : 0; |
135 | } |
136 | |
137 | /// \returns VmVsrc bit width |
138 | inline unsigned getVmVsrcBitWidth() { return 3; } |
139 | |
140 | /// \returns VmVsrc bit shift |
141 | inline unsigned getVmVsrcBitShift() { return 2; } |
142 | |
143 | /// \returns VaVdst bit width |
144 | inline unsigned getVaVdstBitWidth() { return 4; } |
145 | |
146 | /// \returns VaVdst bit shift |
147 | inline unsigned getVaVdstBitShift() { return 12; } |
148 | |
149 | /// \returns SaSdst bit width |
150 | inline unsigned getSaSdstBitWidth() { return 1; } |
151 | |
152 | /// \returns SaSdst bit shift |
153 | inline unsigned getSaSdstBitShift() { return 0; } |
154 | |
155 | } // end namespace anonymous |
156 | |
157 | namespace llvm { |
158 | |
159 | namespace AMDGPU { |
160 | |
161 | /// \returns True if \p STI is AMDHSA. |
162 | bool isHsaAbi(const MCSubtargetInfo &STI) { |
163 | return STI.getTargetTriple().getOS() == Triple::AMDHSA; |
164 | } |
165 | |
166 | unsigned getAMDHSACodeObjectVersion(const Module &M) { |
167 | if (auto Ver = mdconst::extract_or_null<ConstantInt>( |
168 | MD: M.getModuleFlag(Key: "amdhsa_code_object_version" ))) { |
169 | return (unsigned)Ver->getZExtValue() / 100; |
170 | } |
171 | |
172 | return getDefaultAMDHSACodeObjectVersion(); |
173 | } |
174 | |
175 | unsigned getDefaultAMDHSACodeObjectVersion() { |
176 | return DefaultAMDHSACodeObjectVersion; |
177 | } |
178 | |
179 | unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) { |
180 | switch (ABIVersion) { |
181 | case ELF::ELFABIVERSION_AMDGPU_HSA_V4: |
182 | return 4; |
183 | case ELF::ELFABIVERSION_AMDGPU_HSA_V5: |
184 | return 5; |
185 | case ELF::ELFABIVERSION_AMDGPU_HSA_V6: |
186 | return 6; |
187 | default: |
188 | return getDefaultAMDHSACodeObjectVersion(); |
189 | } |
190 | } |
191 | |
192 | uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion) { |
193 | if (T.getOS() != Triple::AMDHSA) |
194 | return 0; |
195 | |
196 | switch (CodeObjectVersion) { |
197 | case 4: |
198 | return ELF::ELFABIVERSION_AMDGPU_HSA_V4; |
199 | case 5: |
200 | return ELF::ELFABIVERSION_AMDGPU_HSA_V5; |
201 | case 6: |
202 | return ELF::ELFABIVERSION_AMDGPU_HSA_V6; |
203 | default: |
204 | report_fatal_error(reason: "Unsupported AMDHSA Code Object Version " + |
205 | Twine(CodeObjectVersion)); |
206 | } |
207 | } |
208 | |
209 | unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) { |
210 | switch (CodeObjectVersion) { |
211 | case AMDHSA_COV4: |
212 | return 48; |
213 | case AMDHSA_COV5: |
214 | case AMDHSA_COV6: |
215 | default: |
216 | return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET; |
217 | } |
218 | } |
219 | |
220 | |
221 | // FIXME: All such magic numbers about the ABI should be in a |
222 | // central TD file. |
223 | unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) { |
224 | switch (CodeObjectVersion) { |
225 | case AMDHSA_COV4: |
226 | return 24; |
227 | case AMDHSA_COV5: |
228 | case AMDHSA_COV6: |
229 | default: |
230 | return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET; |
231 | } |
232 | } |
233 | |
234 | unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) { |
235 | switch (CodeObjectVersion) { |
236 | case AMDHSA_COV4: |
237 | return 32; |
238 | case AMDHSA_COV5: |
239 | case AMDHSA_COV6: |
240 | default: |
241 | return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET; |
242 | } |
243 | } |
244 | |
245 | unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) { |
246 | switch (CodeObjectVersion) { |
247 | case AMDHSA_COV4: |
248 | return 40; |
249 | case AMDHSA_COV5: |
250 | case AMDHSA_COV6: |
251 | default: |
252 | return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET; |
253 | } |
254 | } |
255 | |
256 | #define GET_MIMGBaseOpcodesTable_IMPL |
257 | #define GET_MIMGDimInfoTable_IMPL |
258 | #define GET_MIMGInfoTable_IMPL |
259 | #define GET_MIMGLZMappingTable_IMPL |
260 | #define GET_MIMGMIPMappingTable_IMPL |
261 | #define GET_MIMGBiasMappingTable_IMPL |
262 | #define GET_MIMGOffsetMappingTable_IMPL |
263 | #define GET_MIMGG16MappingTable_IMPL |
264 | #define GET_MAIInstInfoTable_IMPL |
265 | #include "AMDGPUGenSearchableTables.inc" |
266 | |
267 | int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, |
268 | unsigned VDataDwords, unsigned VAddrDwords) { |
269 | const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, |
270 | VDataDwords, VAddrDwords); |
271 | return Info ? Info->Opcode : -1; |
272 | } |
273 | |
274 | const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { |
275 | const MIMGInfo *Info = getMIMGInfo(Opc); |
276 | return Info ? getMIMGBaseOpcodeInfo(BaseOpcode: Info->BaseOpcode) : nullptr; |
277 | } |
278 | |
279 | int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { |
280 | const MIMGInfo *OrigInfo = getMIMGInfo(Opc); |
281 | const MIMGInfo *NewInfo = |
282 | getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, |
283 | NewChannels, OrigInfo->VAddrDwords); |
284 | return NewInfo ? NewInfo->Opcode : -1; |
285 | } |
286 | |
287 | unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, |
288 | const MIMGDimInfo *Dim, bool IsA16, |
289 | bool IsG16Supported) { |
290 | unsigned AddrWords = BaseOpcode->NumExtraArgs; |
291 | unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) + |
292 | (BaseOpcode->LodOrClampOrMip ? 1 : 0); |
293 | if (IsA16) |
294 | AddrWords += divideCeil(Numerator: AddrComponents, Denominator: 2); |
295 | else |
296 | AddrWords += AddrComponents; |
297 | |
298 | // Note: For subtargets that support A16 but not G16, enabling A16 also |
299 | // enables 16 bit gradients. |
300 | // For subtargets that support A16 (operand) and G16 (done with a different |
301 | // instruction encoding), they are independent. |
302 | |
303 | if (BaseOpcode->Gradients) { |
304 | if ((IsA16 && !IsG16Supported) || BaseOpcode->G16) |
305 | // There are two gradients per coordinate, we pack them separately. |
306 | // For the 3d case, |
307 | // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv) |
308 | AddrWords += alignTo<2>(Dim->NumGradients / 2); |
309 | else |
310 | AddrWords += Dim->NumGradients; |
311 | } |
312 | return AddrWords; |
313 | } |
314 | |
315 | struct MUBUFInfo { |
316 | uint16_t Opcode; |
317 | uint16_t BaseOpcode; |
318 | uint8_t elements; |
319 | bool has_vaddr; |
320 | bool has_srsrc; |
321 | bool has_soffset; |
322 | bool IsBufferInv; |
323 | bool tfe; |
324 | }; |
325 | |
326 | struct MTBUFInfo { |
327 | uint16_t Opcode; |
328 | uint16_t BaseOpcode; |
329 | uint8_t elements; |
330 | bool has_vaddr; |
331 | bool has_srsrc; |
332 | bool has_soffset; |
333 | }; |
334 | |
335 | struct SMInfo { |
336 | uint16_t Opcode; |
337 | bool IsBuffer; |
338 | }; |
339 | |
340 | struct VOPInfo { |
341 | uint16_t Opcode; |
342 | bool IsSingle; |
343 | }; |
344 | |
345 | struct VOPC64DPPInfo { |
346 | uint16_t Opcode; |
347 | }; |
348 | |
349 | struct VOPCDPPAsmOnlyInfo { |
350 | uint16_t Opcode; |
351 | }; |
352 | |
353 | struct VOP3CDPPAsmOnlyInfo { |
354 | uint16_t Opcode; |
355 | }; |
356 | |
357 | struct VOPDComponentInfo { |
358 | uint16_t BaseVOP; |
359 | uint16_t VOPDOp; |
360 | bool CanBeVOPDX; |
361 | }; |
362 | |
363 | struct VOPDInfo { |
364 | uint16_t Opcode; |
365 | uint16_t OpX; |
366 | uint16_t OpY; |
367 | uint16_t Subtarget; |
368 | }; |
369 | |
370 | struct VOPTrue16Info { |
371 | uint16_t Opcode; |
372 | bool IsTrue16; |
373 | }; |
374 | |
375 | #define GET_MTBUFInfoTable_DECL |
376 | #define GET_MTBUFInfoTable_IMPL |
377 | #define GET_MUBUFInfoTable_DECL |
378 | #define GET_MUBUFInfoTable_IMPL |
379 | #define GET_SMInfoTable_DECL |
380 | #define GET_SMInfoTable_IMPL |
381 | #define GET_VOP1InfoTable_DECL |
382 | #define GET_VOP1InfoTable_IMPL |
383 | #define GET_VOP2InfoTable_DECL |
384 | #define GET_VOP2InfoTable_IMPL |
385 | #define GET_VOP3InfoTable_DECL |
386 | #define GET_VOP3InfoTable_IMPL |
387 | #define GET_VOPC64DPPTable_DECL |
388 | #define GET_VOPC64DPPTable_IMPL |
389 | #define GET_VOPC64DPP8Table_DECL |
390 | #define GET_VOPC64DPP8Table_IMPL |
391 | #define GET_VOPCAsmOnlyInfoTable_DECL |
392 | #define GET_VOPCAsmOnlyInfoTable_IMPL |
393 | #define GET_VOP3CAsmOnlyInfoTable_DECL |
394 | #define GET_VOP3CAsmOnlyInfoTable_IMPL |
395 | #define GET_VOPDComponentTable_DECL |
396 | #define GET_VOPDComponentTable_IMPL |
397 | #define GET_VOPDPairs_DECL |
398 | #define GET_VOPDPairs_IMPL |
399 | #define GET_VOPTrue16Table_DECL |
400 | #define GET_VOPTrue16Table_IMPL |
401 | #define GET_WMMAOpcode2AddrMappingTable_DECL |
402 | #define GET_WMMAOpcode2AddrMappingTable_IMPL |
403 | #define GET_WMMAOpcode3AddrMappingTable_DECL |
404 | #define GET_WMMAOpcode3AddrMappingTable_IMPL |
405 | #include "AMDGPUGenSearchableTables.inc" |
406 | |
407 | int getMTBUFBaseOpcode(unsigned Opc) { |
408 | const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); |
409 | return Info ? Info->BaseOpcode : -1; |
410 | } |
411 | |
412 | int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { |
413 | const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); |
414 | return Info ? Info->Opcode : -1; |
415 | } |
416 | |
417 | int getMTBUFElements(unsigned Opc) { |
418 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); |
419 | return Info ? Info->elements : 0; |
420 | } |
421 | |
422 | bool getMTBUFHasVAddr(unsigned Opc) { |
423 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); |
424 | return Info ? Info->has_vaddr : false; |
425 | } |
426 | |
427 | bool getMTBUFHasSrsrc(unsigned Opc) { |
428 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); |
429 | return Info ? Info->has_srsrc : false; |
430 | } |
431 | |
432 | bool getMTBUFHasSoffset(unsigned Opc) { |
433 | const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); |
434 | return Info ? Info->has_soffset : false; |
435 | } |
436 | |
437 | int getMUBUFBaseOpcode(unsigned Opc) { |
438 | const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); |
439 | return Info ? Info->BaseOpcode : -1; |
440 | } |
441 | |
442 | int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { |
443 | const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); |
444 | return Info ? Info->Opcode : -1; |
445 | } |
446 | |
447 | int getMUBUFElements(unsigned Opc) { |
448 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); |
449 | return Info ? Info->elements : 0; |
450 | } |
451 | |
452 | bool getMUBUFHasVAddr(unsigned Opc) { |
453 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); |
454 | return Info ? Info->has_vaddr : false; |
455 | } |
456 | |
457 | bool getMUBUFHasSrsrc(unsigned Opc) { |
458 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); |
459 | return Info ? Info->has_srsrc : false; |
460 | } |
461 | |
462 | bool getMUBUFHasSoffset(unsigned Opc) { |
463 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); |
464 | return Info ? Info->has_soffset : false; |
465 | } |
466 | |
467 | bool getMUBUFIsBufferInv(unsigned Opc) { |
468 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); |
469 | return Info ? Info->IsBufferInv : false; |
470 | } |
471 | |
472 | bool getMUBUFTfe(unsigned Opc) { |
473 | const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); |
474 | return Info ? Info->tfe : false; |
475 | } |
476 | |
477 | bool getSMEMIsBuffer(unsigned Opc) { |
478 | const SMInfo *Info = getSMEMOpcodeHelper(Opc); |
479 | return Info ? Info->IsBuffer : false; |
480 | } |
481 | |
482 | bool getVOP1IsSingle(unsigned Opc) { |
483 | const VOPInfo *Info = getVOP1OpcodeHelper(Opc); |
484 | return Info ? Info->IsSingle : false; |
485 | } |
486 | |
487 | bool getVOP2IsSingle(unsigned Opc) { |
488 | const VOPInfo *Info = getVOP2OpcodeHelper(Opc); |
489 | return Info ? Info->IsSingle : false; |
490 | } |
491 | |
492 | bool getVOP3IsSingle(unsigned Opc) { |
493 | const VOPInfo *Info = getVOP3OpcodeHelper(Opc); |
494 | return Info ? Info->IsSingle : false; |
495 | } |
496 | |
497 | bool isVOPC64DPP(unsigned Opc) { |
498 | return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); |
499 | } |
500 | |
501 | bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); } |
502 | |
503 | bool getMAIIsDGEMM(unsigned Opc) { |
504 | const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); |
505 | return Info ? Info->is_dgemm : false; |
506 | } |
507 | |
508 | bool getMAIIsGFX940XDL(unsigned Opc) { |
509 | const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); |
510 | return Info ? Info->is_gfx940_xdl : false; |
511 | } |
512 | |
513 | unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) { |
514 | if (ST.hasFeature(AMDGPU::FeatureGFX12Insts)) |
515 | return SIEncodingFamily::GFX12; |
516 | if (ST.hasFeature(AMDGPU::FeatureGFX11Insts)) |
517 | return SIEncodingFamily::GFX11; |
518 | llvm_unreachable("Subtarget generation does not support VOPD!" ); |
519 | } |
520 | |
521 | CanBeVOPD getCanBeVOPD(unsigned Opc) { |
522 | const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); |
523 | if (Info) |
524 | return {.X: Info->CanBeVOPDX, .Y: true}; |
525 | else |
526 | return {.X: false, .Y: false}; |
527 | } |
528 | |
529 | unsigned getVOPDOpcode(unsigned Opc) { |
530 | const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc); |
531 | return Info ? Info->VOPDOp : ~0u; |
532 | } |
533 | |
534 | bool isVOPD(unsigned Opc) { |
535 | return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); |
536 | } |
537 | |
538 | bool isMAC(unsigned Opc) { |
539 | return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || |
540 | Opc == AMDGPU::V_MAC_F32_e64_gfx10 || |
541 | Opc == AMDGPU::V_MAC_F32_e64_vi || |
542 | Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || |
543 | Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || |
544 | Opc == AMDGPU::V_MAC_F16_e64_vi || |
545 | Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || |
546 | Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || |
547 | Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || |
548 | Opc == AMDGPU::V_FMAC_F32_e64_gfx12 || |
549 | Opc == AMDGPU::V_FMAC_F32_e64_vi || |
550 | Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || |
551 | Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || |
552 | Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || |
553 | Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 || |
554 | Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 || |
555 | Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi || |
556 | Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi || |
557 | Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi || |
558 | Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi; |
559 | } |
560 | |
561 | bool isPermlane16(unsigned Opc) { |
562 | return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || |
563 | Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 || |
564 | Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 || |
565 | Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 || |
566 | Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 || |
567 | Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 || |
568 | Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 || |
569 | Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12; |
570 | } |
571 | |
572 | bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc) { |
573 | return Opc == AMDGPU::V_CVT_F32_BF8_e64_gfx12 || |
574 | Opc == AMDGPU::V_CVT_F32_FP8_e64_gfx12 || |
575 | Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp_gfx12 || |
576 | Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp_gfx12 || |
577 | Opc == AMDGPU::V_CVT_F32_BF8_e64_dpp8_gfx12 || |
578 | Opc == AMDGPU::V_CVT_F32_FP8_e64_dpp8_gfx12 || |
579 | Opc == AMDGPU::V_CVT_PK_F32_BF8_e64_gfx12 || |
580 | Opc == AMDGPU::V_CVT_PK_F32_FP8_e64_gfx12; |
581 | } |
582 | |
583 | bool isGenericAtomic(unsigned Opc) { |
584 | return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN || |
585 | Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX || |
586 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP || |
587 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD || |
588 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB || |
589 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN || |
590 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN || |
591 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX || |
592 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX || |
593 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND || |
594 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR || |
595 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR || |
596 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC || |
597 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC || |
598 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD || |
599 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN || |
600 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX || |
601 | Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP || |
602 | Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG; |
603 | } |
604 | |
605 | bool isTrue16Inst(unsigned Opc) { |
606 | const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc); |
607 | return Info ? Info->IsTrue16 : false; |
608 | } |
609 | |
610 | unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { |
611 | const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); |
612 | return Info ? Info->Opcode3Addr : ~0u; |
613 | } |
614 | |
615 | unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) { |
616 | const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc); |
617 | return Info ? Info->Opcode2Addr : ~0u; |
618 | } |
619 | |
620 | // Wrapper for Tablegen'd function. enum Subtarget is not defined in any |
621 | // header files, so we need to wrap it in a function that takes unsigned |
622 | // instead. |
623 | int getMCOpcode(uint16_t Opcode, unsigned Gen) { |
624 | return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); |
625 | } |
626 | |
627 | int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) { |
628 | const VOPDInfo *Info = |
629 | getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily); |
630 | return Info ? Info->Opcode : -1; |
631 | } |
632 | |
633 | std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) { |
634 | const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode); |
635 | assert(Info); |
636 | auto OpX = getVOPDBaseFromComponent(Info->OpX); |
637 | auto OpY = getVOPDBaseFromComponent(Info->OpY); |
638 | assert(OpX && OpY); |
639 | return {OpX->BaseVOP, OpY->BaseVOP}; |
640 | } |
641 | |
642 | namespace VOPD { |
643 | |
644 | ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) { |
645 | assert(OpDesc.getNumDefs() == Component::DST_NUM); |
646 | |
647 | assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1); |
648 | assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1); |
649 | auto TiedIdx = OpDesc.getOperandConstraint(OpNum: Component::SRC2, Constraint: MCOI::TIED_TO); |
650 | assert(TiedIdx == -1 || TiedIdx == Component::DST); |
651 | HasSrc2Acc = TiedIdx != -1; |
652 | |
653 | SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs(); |
654 | assert(SrcOperandsNum <= Component::MAX_SRC_NUM); |
655 | |
656 | auto OperandsNum = OpDesc.getNumOperands(); |
657 | unsigned CompOprIdx; |
658 | for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) { |
659 | if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) { |
660 | MandatoryLiteralIdx = CompOprIdx; |
661 | break; |
662 | } |
663 | } |
664 | } |
665 | |
666 | unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const { |
667 | assert(CompOprIdx < Component::MAX_OPR_NUM); |
668 | |
669 | if (CompOprIdx == Component::DST) |
670 | return getIndexOfDstInParsedOperands(); |
671 | |
672 | auto CompSrcIdx = CompOprIdx - Component::DST_NUM; |
673 | if (CompSrcIdx < getCompParsedSrcOperandsNum()) |
674 | return getIndexOfSrcInParsedOperands(CompSrcIdx); |
675 | |
676 | // The specified operand does not exist. |
677 | return 0; |
678 | } |
679 | |
680 | std::optional<unsigned> InstInfo::getInvalidCompOperandIndex( |
681 | std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const { |
682 | |
683 | auto OpXRegs = getRegIndices(ComponentIdx: ComponentIndex::X, GetRegIdx: GetRegIdx); |
684 | auto OpYRegs = getRegIndices(ComponentIdx: ComponentIndex::Y, GetRegIdx: GetRegIdx); |
685 | |
686 | const unsigned CompOprNum = |
687 | SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM; |
688 | unsigned CompOprIdx; |
689 | for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) { |
690 | unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx]; |
691 | if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] && |
692 | ((OpXRegs[CompOprIdx] & BanksMasks) == |
693 | (OpYRegs[CompOprIdx] & BanksMasks))) |
694 | return CompOprIdx; |
695 | } |
696 | |
697 | return {}; |
698 | } |
699 | |
700 | // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used |
701 | // by the specified component. If an operand is unused |
702 | // or is not a VGPR, the corresponding value is 0. |
703 | // |
704 | // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index |
705 | // for the specified component and MC operand. The callback must return 0 |
706 | // if the operand is not a register or not a VGPR. |
707 | InstInfo::RegIndices InstInfo::getRegIndices( |
708 | unsigned CompIdx, |
709 | std::function<unsigned(unsigned, unsigned)> GetRegIdx) const { |
710 | assert(CompIdx < COMPONENTS_NUM); |
711 | |
712 | const auto &Comp = CompInfo[CompIdx]; |
713 | InstInfo::RegIndices RegIndices; |
714 | |
715 | RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands()); |
716 | |
717 | for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) { |
718 | unsigned CompSrcIdx = CompOprIdx - DST_NUM; |
719 | RegIndices[CompOprIdx] = |
720 | Comp.hasRegSrcOperand(CompSrcIdx) |
721 | ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx)) |
722 | : 0; |
723 | } |
724 | return RegIndices; |
725 | } |
726 | |
727 | } // namespace VOPD |
728 | |
729 | VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) { |
730 | return VOPD::InstInfo(OpX, OpY); |
731 | } |
732 | |
733 | VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, |
734 | const MCInstrInfo *InstrInfo) { |
735 | auto [OpX, OpY] = getVOPDComponents(VOPDOpcode); |
736 | const auto &OpXDesc = InstrInfo->get(Opcode: OpX); |
737 | const auto &OpYDesc = InstrInfo->get(Opcode: OpY); |
738 | VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X); |
739 | VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo); |
740 | return VOPD::InstInfo(OpXInfo, OpYInfo); |
741 | } |
742 | |
743 | namespace IsaInfo { |
744 | |
745 | AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) |
746 | : STI(STI), XnackSetting(TargetIDSetting::Any), |
747 | SramEccSetting(TargetIDSetting::Any) { |
748 | if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) |
749 | XnackSetting = TargetIDSetting::Unsupported; |
750 | if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) |
751 | SramEccSetting = TargetIDSetting::Unsupported; |
752 | } |
753 | |
754 | void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { |
755 | // Check if xnack or sramecc is explicitly enabled or disabled. In the |
756 | // absence of the target features we assume we must generate code that can run |
757 | // in any environment. |
758 | SubtargetFeatures Features(FS); |
759 | std::optional<bool> XnackRequested; |
760 | std::optional<bool> SramEccRequested; |
761 | |
762 | for (const std::string &Feature : Features.getFeatures()) { |
763 | if (Feature == "+xnack" ) |
764 | XnackRequested = true; |
765 | else if (Feature == "-xnack" ) |
766 | XnackRequested = false; |
767 | else if (Feature == "+sramecc" ) |
768 | SramEccRequested = true; |
769 | else if (Feature == "-sramecc" ) |
770 | SramEccRequested = false; |
771 | } |
772 | |
773 | bool XnackSupported = isXnackSupported(); |
774 | bool SramEccSupported = isSramEccSupported(); |
775 | |
776 | if (XnackRequested) { |
777 | if (XnackSupported) { |
778 | XnackSetting = |
779 | *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; |
780 | } else { |
781 | // If a specific xnack setting was requested and this GPU does not support |
782 | // xnack emit a warning. Setting will remain set to "Unsupported". |
783 | if (*XnackRequested) { |
784 | errs() << "warning: xnack 'On' was requested for a processor that does " |
785 | "not support it!\n" ; |
786 | } else { |
787 | errs() << "warning: xnack 'Off' was requested for a processor that " |
788 | "does not support it!\n" ; |
789 | } |
790 | } |
791 | } |
792 | |
793 | if (SramEccRequested) { |
794 | if (SramEccSupported) { |
795 | SramEccSetting = |
796 | *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; |
797 | } else { |
798 | // If a specific sramecc setting was requested and this GPU does not |
799 | // support sramecc emit a warning. Setting will remain set to |
800 | // "Unsupported". |
801 | if (*SramEccRequested) { |
802 | errs() << "warning: sramecc 'On' was requested for a processor that " |
803 | "does not support it!\n" ; |
804 | } else { |
805 | errs() << "warning: sramecc 'Off' was requested for a processor that " |
806 | "does not support it!\n" ; |
807 | } |
808 | } |
809 | } |
810 | } |
811 | |
812 | static TargetIDSetting |
813 | getTargetIDSettingFromFeatureString(StringRef FeatureString) { |
814 | if (FeatureString.ends_with(Suffix: "-" )) |
815 | return TargetIDSetting::Off; |
816 | if (FeatureString.ends_with(Suffix: "+" )) |
817 | return TargetIDSetting::On; |
818 | |
819 | llvm_unreachable("Malformed feature string" ); |
820 | } |
821 | |
822 | void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { |
823 | SmallVector<StringRef, 3> TargetIDSplit; |
824 | TargetID.split(TargetIDSplit, ':'); |
825 | |
826 | for (const auto &FeatureString : TargetIDSplit) { |
827 | if (FeatureString.starts_with("xnack" )) |
828 | XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); |
829 | if (FeatureString.starts_with("sramecc" )) |
830 | SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); |
831 | } |
832 | } |
833 | |
834 | std::string AMDGPUTargetID::toString() const { |
835 | std::string StringRep; |
836 | raw_string_ostream StreamRep(StringRep); |
837 | |
838 | auto TargetTriple = STI.getTargetTriple(); |
839 | auto Version = getIsaVersion(GPU: STI.getCPU()); |
840 | |
841 | StreamRep << TargetTriple.getArchName() << '-' |
842 | << TargetTriple.getVendorName() << '-' |
843 | << TargetTriple.getOSName() << '-' |
844 | << TargetTriple.getEnvironmentName() << '-'; |
845 | |
846 | std::string Processor; |
847 | // TODO: Following else statement is present here because we used various |
848 | // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). |
849 | // Remove once all aliases are removed from GCNProcessors.td. |
850 | if (Version.Major >= 9) |
851 | Processor = STI.getCPU().str(); |
852 | else |
853 | Processor = (Twine("gfx" ) + Twine(Version.Major) + Twine(Version.Minor) + |
854 | Twine(Version.Stepping)) |
855 | .str(); |
856 | |
857 | std::string Features; |
858 | if (STI.getTargetTriple().getOS() == Triple::AMDHSA) { |
859 | // sramecc. |
860 | if (getSramEccSetting() == TargetIDSetting::Off) |
861 | Features += ":sramecc-" ; |
862 | else if (getSramEccSetting() == TargetIDSetting::On) |
863 | Features += ":sramecc+" ; |
864 | // xnack. |
865 | if (getXnackSetting() == TargetIDSetting::Off) |
866 | Features += ":xnack-" ; |
867 | else if (getXnackSetting() == TargetIDSetting::On) |
868 | Features += ":xnack+" ; |
869 | } |
870 | |
871 | StreamRep << Processor << Features; |
872 | |
873 | StreamRep.flush(); |
874 | return StringRep; |
875 | } |
876 | |
877 | unsigned getWavefrontSize(const MCSubtargetInfo *STI) { |
878 | if (STI->getFeatureBits().test(FeatureWavefrontSize16)) |
879 | return 16; |
880 | if (STI->getFeatureBits().test(FeatureWavefrontSize32)) |
881 | return 32; |
882 | |
883 | return 64; |
884 | } |
885 | |
886 | unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { |
887 | unsigned BytesPerCU = 0; |
888 | if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) |
889 | BytesPerCU = 32768; |
890 | if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) |
891 | BytesPerCU = 65536; |
892 | |
893 | // "Per CU" really means "per whatever functional block the waves of a |
894 | // workgroup must share". So the effective local memory size is doubled in |
895 | // WGP mode on gfx10. |
896 | if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) |
897 | BytesPerCU *= 2; |
898 | |
899 | return BytesPerCU; |
900 | } |
901 | |
902 | unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) { |
903 | if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) |
904 | return 32768; |
905 | if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) |
906 | return 65536; |
907 | return 0; |
908 | } |
909 | |
910 | unsigned getEUsPerCU(const MCSubtargetInfo *STI) { |
911 | // "Per CU" really means "per whatever functional block the waves of a |
912 | // workgroup must share". For gfx10 in CU mode this is the CU, which contains |
913 | // two SIMDs. |
914 | if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) |
915 | return 2; |
916 | // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains |
917 | // two CUs, so a total of four SIMDs. |
918 | return 4; |
919 | } |
920 | |
921 | unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, |
922 | unsigned FlatWorkGroupSize) { |
923 | assert(FlatWorkGroupSize != 0); |
924 | if (STI->getTargetTriple().getArch() != Triple::amdgcn) |
925 | return 8; |
926 | unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI); |
927 | unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); |
928 | if (N == 1) { |
929 | // Single-wave workgroups don't consume barrier resources. |
930 | return MaxWaves; |
931 | } |
932 | |
933 | unsigned MaxBarriers = 16; |
934 | if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode)) |
935 | MaxBarriers = 32; |
936 | |
937 | return std::min(MaxWaves / N, MaxBarriers); |
938 | } |
939 | |
940 | unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { |
941 | return 1; |
942 | } |
943 | |
944 | unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { |
945 | // FIXME: Need to take scratch memory into account. |
946 | if (isGFX90A(STI: *STI)) |
947 | return 8; |
948 | if (!isGFX10Plus(STI: *STI)) |
949 | return 10; |
950 | return hasGFX10_3Insts(STI: *STI) ? 16 : 20; |
951 | } |
952 | |
953 | unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, |
954 | unsigned FlatWorkGroupSize) { |
955 | return divideCeil(Numerator: getWavesPerWorkGroup(STI, FlatWorkGroupSize), |
956 | Denominator: getEUsPerCU(STI)); |
957 | } |
958 | |
959 | unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { |
960 | return 1; |
961 | } |
962 | |
963 | unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { |
964 | // Some subtargets allow encoding 2048, but this isn't tested or supported. |
965 | return 1024; |
966 | } |
967 | |
968 | unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, |
969 | unsigned FlatWorkGroupSize) { |
970 | return divideCeil(Numerator: FlatWorkGroupSize, Denominator: getWavefrontSize(STI)); |
971 | } |
972 | |
973 | unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { |
974 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
975 | if (Version.Major >= 10) |
976 | return getAddressableNumSGPRs(STI); |
977 | if (Version.Major >= 8) |
978 | return 16; |
979 | return 8; |
980 | } |
981 | |
982 | unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { |
983 | return 8; |
984 | } |
985 | |
986 | unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { |
987 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
988 | if (Version.Major >= 8) |
989 | return 800; |
990 | return 512; |
991 | } |
992 | |
993 | unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { |
994 | if (STI->getFeatureBits().test(FeatureSGPRInitBug)) |
995 | return FIXED_NUM_SGPRS_FOR_INIT_BUG; |
996 | |
997 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
998 | if (Version.Major >= 10) |
999 | return 106; |
1000 | if (Version.Major >= 8) |
1001 | return 102; |
1002 | return 104; |
1003 | } |
1004 | |
1005 | unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { |
1006 | assert(WavesPerEU != 0); |
1007 | |
1008 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1009 | if (Version.Major >= 10) |
1010 | return 0; |
1011 | |
1012 | if (WavesPerEU >= getMaxWavesPerEU(STI)) |
1013 | return 0; |
1014 | |
1015 | unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); |
1016 | if (STI->getFeatureBits().test(FeatureTrapHandler)) |
1017 | MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); |
1018 | MinNumSGPRs = alignDown(Value: MinNumSGPRs, Align: getSGPRAllocGranule(STI)) + 1; |
1019 | return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); |
1020 | } |
1021 | |
1022 | unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, |
1023 | bool Addressable) { |
1024 | assert(WavesPerEU != 0); |
1025 | |
1026 | unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); |
1027 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1028 | if (Version.Major >= 10) |
1029 | return Addressable ? AddressableNumSGPRs : 108; |
1030 | if (Version.Major >= 8 && !Addressable) |
1031 | AddressableNumSGPRs = 112; |
1032 | unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; |
1033 | if (STI->getFeatureBits().test(FeatureTrapHandler)) |
1034 | MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); |
1035 | MaxNumSGPRs = alignDown(Value: MaxNumSGPRs, Align: getSGPRAllocGranule(STI)); |
1036 | return std::min(MaxNumSGPRs, AddressableNumSGPRs); |
1037 | } |
1038 | |
1039 | unsigned (const MCSubtargetInfo *STI, bool VCCUsed, |
1040 | bool FlatScrUsed, bool XNACKUsed) { |
1041 | unsigned = 0; |
1042 | if (VCCUsed) |
1043 | ExtraSGPRs = 2; |
1044 | |
1045 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1046 | if (Version.Major >= 10) |
1047 | return ExtraSGPRs; |
1048 | |
1049 | if (Version.Major < 8) { |
1050 | if (FlatScrUsed) |
1051 | ExtraSGPRs = 4; |
1052 | } else { |
1053 | if (XNACKUsed) |
1054 | ExtraSGPRs = 4; |
1055 | |
1056 | if (FlatScrUsed || |
1057 | STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch)) |
1058 | ExtraSGPRs = 6; |
1059 | } |
1060 | |
1061 | return ExtraSGPRs; |
1062 | } |
1063 | |
1064 | unsigned (const MCSubtargetInfo *STI, bool VCCUsed, |
1065 | bool FlatScrUsed) { |
1066 | return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, |
1067 | STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); |
1068 | } |
1069 | |
1070 | static unsigned getGranulatedNumRegisterBlocks(unsigned NumRegs, |
1071 | unsigned Granule) { |
1072 | return divideCeil(std::max(1u, NumRegs), Granule); |
1073 | } |
1074 | |
1075 | unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { |
1076 | // SGPRBlocks is actual number of SGPR blocks minus 1. |
1077 | return getGranulatedNumRegisterBlocks(NumRegs: NumSGPRs, Granule: getSGPREncodingGranule(STI)) - |
1078 | 1; |
1079 | } |
1080 | |
1081 | unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, |
1082 | std::optional<bool> EnableWavefrontSize32) { |
1083 | if (STI->getFeatureBits().test(FeatureGFX90AInsts)) |
1084 | return 8; |
1085 | |
1086 | bool IsWave32 = EnableWavefrontSize32 ? |
1087 | *EnableWavefrontSize32 : |
1088 | STI->getFeatureBits().test(FeatureWavefrontSize32); |
1089 | |
1090 | if (STI->getFeatureBits().test(Feature1_5xVGPRs)) |
1091 | return IsWave32 ? 24 : 12; |
1092 | |
1093 | if (hasGFX10_3Insts(STI: *STI)) |
1094 | return IsWave32 ? 16 : 8; |
1095 | |
1096 | return IsWave32 ? 8 : 4; |
1097 | } |
1098 | |
1099 | unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, |
1100 | std::optional<bool> EnableWavefrontSize32) { |
1101 | if (STI->getFeatureBits().test(FeatureGFX90AInsts)) |
1102 | return 8; |
1103 | |
1104 | bool IsWave32 = EnableWavefrontSize32 ? |
1105 | *EnableWavefrontSize32 : |
1106 | STI->getFeatureBits().test(FeatureWavefrontSize32); |
1107 | |
1108 | return IsWave32 ? 8 : 4; |
1109 | } |
1110 | |
1111 | unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { |
1112 | if (STI->getFeatureBits().test(FeatureGFX90AInsts)) |
1113 | return 512; |
1114 | if (!isGFX10Plus(STI: *STI)) |
1115 | return 256; |
1116 | bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32); |
1117 | if (STI->getFeatureBits().test(Feature1_5xVGPRs)) |
1118 | return IsWave32 ? 1536 : 768; |
1119 | return IsWave32 ? 1024 : 512; |
1120 | } |
1121 | |
1122 | unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; } |
1123 | |
1124 | unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { |
1125 | if (STI->getFeatureBits().test(FeatureGFX90AInsts)) |
1126 | return 512; |
1127 | return getAddressableNumArchVGPRs(STI); |
1128 | } |
1129 | |
1130 | unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, |
1131 | unsigned NumVGPRs) { |
1132 | unsigned MaxWaves = getMaxWavesPerEU(STI); |
1133 | unsigned Granule = getVGPRAllocGranule(STI); |
1134 | if (NumVGPRs < Granule) |
1135 | return MaxWaves; |
1136 | unsigned RoundedRegs = alignTo(Value: NumVGPRs, Align: Granule); |
1137 | return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves); |
1138 | } |
1139 | |
1140 | unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { |
1141 | assert(WavesPerEU != 0); |
1142 | |
1143 | unsigned MaxWavesPerEU = getMaxWavesPerEU(STI); |
1144 | if (WavesPerEU >= MaxWavesPerEU) |
1145 | return 0; |
1146 | |
1147 | unsigned TotNumVGPRs = getTotalNumVGPRs(STI); |
1148 | unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI); |
1149 | unsigned Granule = getVGPRAllocGranule(STI); |
1150 | unsigned MaxNumVGPRs = alignDown(Value: TotNumVGPRs / WavesPerEU, Align: Granule); |
1151 | |
1152 | if (MaxNumVGPRs == alignDown(Value: TotNumVGPRs / MaxWavesPerEU, Align: Granule)) |
1153 | return 0; |
1154 | |
1155 | unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, NumVGPRs: AddrsableNumVGPRs); |
1156 | if (WavesPerEU < MinWavesPerEU) |
1157 | return getMinNumVGPRs(STI, WavesPerEU: MinWavesPerEU); |
1158 | |
1159 | unsigned MaxNumVGPRsNext = alignDown(Value: TotNumVGPRs / (WavesPerEU + 1), Align: Granule); |
1160 | unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext); |
1161 | return std::min(MinNumVGPRs, AddrsableNumVGPRs); |
1162 | } |
1163 | |
1164 | unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { |
1165 | assert(WavesPerEU != 0); |
1166 | |
1167 | unsigned MaxNumVGPRs = alignDown(Value: getTotalNumVGPRs(STI) / WavesPerEU, |
1168 | Align: getVGPRAllocGranule(STI)); |
1169 | unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); |
1170 | return std::min(MaxNumVGPRs, AddressableNumVGPRs); |
1171 | } |
1172 | |
1173 | unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, |
1174 | std::optional<bool> EnableWavefrontSize32) { |
1175 | return getGranulatedNumRegisterBlocks( |
1176 | NumRegs: NumVGPRs, Granule: getVGPREncodingGranule(STI, EnableWavefrontSize32)) - |
1177 | 1; |
1178 | } |
1179 | |
1180 | unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, |
1181 | unsigned NumVGPRs, |
1182 | std::optional<bool> EnableWavefrontSize32) { |
1183 | return getGranulatedNumRegisterBlocks( |
1184 | NumRegs: NumVGPRs, Granule: getVGPRAllocGranule(STI, EnableWavefrontSize32)); |
1185 | } |
1186 | } // end namespace IsaInfo |
1187 | |
1188 | void initDefaultAMDKernelCodeT(amd_kernel_code_t &, |
1189 | const MCSubtargetInfo *STI) { |
1190 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
1191 | |
1192 | memset(s: &Header, c: 0, n: sizeof(Header)); |
1193 | |
1194 | Header.amd_kernel_code_version_major = 1; |
1195 | Header.amd_kernel_code_version_minor = 2; |
1196 | Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU |
1197 | Header.amd_machine_version_major = Version.Major; |
1198 | Header.amd_machine_version_minor = Version.Minor; |
1199 | Header.amd_machine_version_stepping = Version.Stepping; |
1200 | Header.kernel_code_entry_byte_offset = sizeof(Header); |
1201 | Header.wavefront_size = 6; |
1202 | |
1203 | // If the code object does not support indirect functions, then the value must |
1204 | // be 0xffffffff. |
1205 | Header.call_convention = -1; |
1206 | |
1207 | // These alignment values are specified in powers of two, so alignment = |
1208 | // 2^n. The minimum alignment is 2^4 = 16. |
1209 | Header.kernarg_segment_alignment = 4; |
1210 | Header.group_segment_alignment = 4; |
1211 | Header.private_segment_alignment = 4; |
1212 | |
1213 | if (Version.Major >= 10) { |
1214 | if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { |
1215 | Header.wavefront_size = 5; |
1216 | Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; |
1217 | } |
1218 | Header.compute_pgm_resource_registers |= |
1219 | S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | |
1220 | S_00B848_MEM_ORDERED(1); |
1221 | } |
1222 | } |
1223 | |
1224 | bool isGroupSegment(const GlobalValue *GV) { |
1225 | return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; |
1226 | } |
1227 | |
1228 | bool isGlobalSegment(const GlobalValue *GV) { |
1229 | return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; |
1230 | } |
1231 | |
1232 | bool isReadOnlySegment(const GlobalValue *GV) { |
1233 | unsigned AS = GV->getAddressSpace(); |
1234 | return AS == AMDGPUAS::CONSTANT_ADDRESS || |
1235 | AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; |
1236 | } |
1237 | |
1238 | bool shouldEmitConstantsToTextSection(const Triple &TT) { |
1239 | return TT.getArch() == Triple::r600; |
1240 | } |
1241 | |
1242 | std::pair<unsigned, unsigned> |
1243 | getIntegerPairAttribute(const Function &F, StringRef Name, |
1244 | std::pair<unsigned, unsigned> Default, |
1245 | bool OnlyFirstRequired) { |
1246 | Attribute A = F.getFnAttribute(Kind: Name); |
1247 | if (!A.isStringAttribute()) |
1248 | return Default; |
1249 | |
1250 | LLVMContext &Ctx = F.getContext(); |
1251 | std::pair<unsigned, unsigned> Ints = Default; |
1252 | std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); |
1253 | if (Strs.first.trim().getAsInteger(0, Ints.first)) { |
1254 | Ctx.emitError(ErrorStr: "can't parse first integer attribute " + Name); |
1255 | return Default; |
1256 | } |
1257 | if (Strs.second.trim().getAsInteger(0, Ints.second)) { |
1258 | if (!OnlyFirstRequired || !Strs.second.trim().empty()) { |
1259 | Ctx.emitError(ErrorStr: "can't parse second integer attribute " + Name); |
1260 | return Default; |
1261 | } |
1262 | } |
1263 | |
1264 | return Ints; |
1265 | } |
1266 | |
1267 | SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name, |
1268 | unsigned Size) { |
1269 | assert(Size > 2); |
1270 | SmallVector<unsigned> Default(Size, 0); |
1271 | |
1272 | Attribute A = F.getFnAttribute(Kind: Name); |
1273 | if (!A.isStringAttribute()) |
1274 | return Default; |
1275 | |
1276 | SmallVector<unsigned> Vals(Size, 0); |
1277 | |
1278 | LLVMContext &Ctx = F.getContext(); |
1279 | |
1280 | StringRef S = A.getValueAsString(); |
1281 | unsigned i = 0; |
1282 | for (; !S.empty() && i < Size; i++) { |
1283 | std::pair<StringRef, StringRef> Strs = S.split(','); |
1284 | unsigned IntVal; |
1285 | if (Strs.first.trim().getAsInteger(0, IntVal)) { |
1286 | Ctx.emitError("can't parse integer attribute " + Strs.first + " in " + |
1287 | Name); |
1288 | return Default; |
1289 | } |
1290 | Vals[i] = IntVal; |
1291 | S = Strs.second; |
1292 | } |
1293 | |
1294 | if (!S.empty() || i < Size) { |
1295 | Ctx.emitError(ErrorStr: "attribute " + Name + |
1296 | " has incorrect number of integers; expected " + |
1297 | llvm::utostr(X: Size)); |
1298 | return Default; |
1299 | } |
1300 | return Vals; |
1301 | } |
1302 | |
1303 | unsigned getVmcntBitMask(const IsaVersion &Version) { |
1304 | return (1 << (getVmcntBitWidthLo(VersionMajor: Version.Major) + |
1305 | getVmcntBitWidthHi(VersionMajor: Version.Major))) - |
1306 | 1; |
1307 | } |
1308 | |
1309 | unsigned getLoadcntBitMask(const IsaVersion &Version) { |
1310 | return (1 << getLoadcntBitWidth(VersionMajor: Version.Major)) - 1; |
1311 | } |
1312 | |
1313 | unsigned getSamplecntBitMask(const IsaVersion &Version) { |
1314 | return (1 << getSamplecntBitWidth(VersionMajor: Version.Major)) - 1; |
1315 | } |
1316 | |
1317 | unsigned getBvhcntBitMask(const IsaVersion &Version) { |
1318 | return (1 << getBvhcntBitWidth(VersionMajor: Version.Major)) - 1; |
1319 | } |
1320 | |
1321 | unsigned getExpcntBitMask(const IsaVersion &Version) { |
1322 | return (1 << getExpcntBitWidth(VersionMajor: Version.Major)) - 1; |
1323 | } |
1324 | |
1325 | unsigned getLgkmcntBitMask(const IsaVersion &Version) { |
1326 | return (1 << getLgkmcntBitWidth(VersionMajor: Version.Major)) - 1; |
1327 | } |
1328 | |
1329 | unsigned getDscntBitMask(const IsaVersion &Version) { |
1330 | return (1 << getDscntBitWidth(VersionMajor: Version.Major)) - 1; |
1331 | } |
1332 | |
1333 | unsigned getKmcntBitMask(const IsaVersion &Version) { |
1334 | return (1 << getKmcntBitWidth(VersionMajor: Version.Major)) - 1; |
1335 | } |
1336 | |
1337 | unsigned getStorecntBitMask(const IsaVersion &Version) { |
1338 | return (1 << getStorecntBitWidth(VersionMajor: Version.Major)) - 1; |
1339 | } |
1340 | |
1341 | unsigned getWaitcntBitMask(const IsaVersion &Version) { |
1342 | unsigned VmcntLo = getBitMask(Shift: getVmcntBitShiftLo(VersionMajor: Version.Major), |
1343 | Width: getVmcntBitWidthLo(VersionMajor: Version.Major)); |
1344 | unsigned Expcnt = getBitMask(Shift: getExpcntBitShift(VersionMajor: Version.Major), |
1345 | Width: getExpcntBitWidth(VersionMajor: Version.Major)); |
1346 | unsigned Lgkmcnt = getBitMask(Shift: getLgkmcntBitShift(VersionMajor: Version.Major), |
1347 | Width: getLgkmcntBitWidth(VersionMajor: Version.Major)); |
1348 | unsigned VmcntHi = getBitMask(Shift: getVmcntBitShiftHi(VersionMajor: Version.Major), |
1349 | Width: getVmcntBitWidthHi(VersionMajor: Version.Major)); |
1350 | return VmcntLo | Expcnt | Lgkmcnt | VmcntHi; |
1351 | } |
1352 | |
1353 | unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { |
1354 | unsigned VmcntLo = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major), |
1355 | Width: getVmcntBitWidthLo(VersionMajor: Version.Major)); |
1356 | unsigned VmcntHi = unpackBits(Src: Waitcnt, Shift: getVmcntBitShiftHi(VersionMajor: Version.Major), |
1357 | Width: getVmcntBitWidthHi(VersionMajor: Version.Major)); |
1358 | return VmcntLo | VmcntHi << getVmcntBitWidthLo(VersionMajor: Version.Major); |
1359 | } |
1360 | |
1361 | unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { |
1362 | return unpackBits(Src: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major), |
1363 | Width: getExpcntBitWidth(VersionMajor: Version.Major)); |
1364 | } |
1365 | |
1366 | unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { |
1367 | return unpackBits(Src: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major), |
1368 | Width: getLgkmcntBitWidth(VersionMajor: Version.Major)); |
1369 | } |
1370 | |
1371 | void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, |
1372 | unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { |
1373 | Vmcnt = decodeVmcnt(Version, Waitcnt); |
1374 | Expcnt = decodeExpcnt(Version, Waitcnt); |
1375 | Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); |
1376 | } |
1377 | |
1378 | Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { |
1379 | Waitcnt Decoded; |
1380 | Decoded.LoadCnt = decodeVmcnt(Version, Waitcnt: Encoded); |
1381 | Decoded.ExpCnt = decodeExpcnt(Version, Waitcnt: Encoded); |
1382 | Decoded.DsCnt = decodeLgkmcnt(Version, Waitcnt: Encoded); |
1383 | return Decoded; |
1384 | } |
1385 | |
1386 | unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, |
1387 | unsigned Vmcnt) { |
1388 | Waitcnt = packBits(Src: Vmcnt, Dst: Waitcnt, Shift: getVmcntBitShiftLo(VersionMajor: Version.Major), |
1389 | Width: getVmcntBitWidthLo(VersionMajor: Version.Major)); |
1390 | return packBits(Src: Vmcnt >> getVmcntBitWidthLo(VersionMajor: Version.Major), Dst: Waitcnt, |
1391 | Shift: getVmcntBitShiftHi(VersionMajor: Version.Major), |
1392 | Width: getVmcntBitWidthHi(VersionMajor: Version.Major)); |
1393 | } |
1394 | |
1395 | unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, |
1396 | unsigned Expcnt) { |
1397 | return packBits(Src: Expcnt, Dst: Waitcnt, Shift: getExpcntBitShift(VersionMajor: Version.Major), |
1398 | Width: getExpcntBitWidth(VersionMajor: Version.Major)); |
1399 | } |
1400 | |
1401 | unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, |
1402 | unsigned Lgkmcnt) { |
1403 | return packBits(Src: Lgkmcnt, Dst: Waitcnt, Shift: getLgkmcntBitShift(VersionMajor: Version.Major), |
1404 | Width: getLgkmcntBitWidth(VersionMajor: Version.Major)); |
1405 | } |
1406 | |
1407 | unsigned encodeWaitcnt(const IsaVersion &Version, |
1408 | unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { |
1409 | unsigned Waitcnt = getWaitcntBitMask(Version); |
1410 | Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); |
1411 | Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); |
1412 | Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); |
1413 | return Waitcnt; |
1414 | } |
1415 | |
1416 | unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { |
1417 | return encodeWaitcnt(Version, Vmcnt: Decoded.LoadCnt, Expcnt: Decoded.ExpCnt, Lgkmcnt: Decoded.DsCnt); |
1418 | } |
1419 | |
1420 | static unsigned getCombinedCountBitMask(const IsaVersion &Version, |
1421 | bool IsStore) { |
1422 | unsigned Dscnt = getBitMask(Shift: getDscntBitShift(VersionMajor: Version.Major), |
1423 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1424 | if (IsStore) { |
1425 | unsigned Storecnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1426 | Width: getStorecntBitWidth(VersionMajor: Version.Major)); |
1427 | return Dscnt | Storecnt; |
1428 | } else { |
1429 | unsigned Loadcnt = getBitMask(Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1430 | Width: getLoadcntBitWidth(VersionMajor: Version.Major)); |
1431 | return Dscnt | Loadcnt; |
1432 | } |
1433 | } |
1434 | |
1435 | Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt) { |
1436 | Waitcnt Decoded; |
1437 | Decoded.LoadCnt = |
1438 | unpackBits(Src: LoadcntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1439 | Width: getLoadcntBitWidth(VersionMajor: Version.Major)); |
1440 | Decoded.DsCnt = unpackBits(Src: LoadcntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major), |
1441 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1442 | return Decoded; |
1443 | } |
1444 | |
1445 | Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt) { |
1446 | Waitcnt Decoded; |
1447 | Decoded.StoreCnt = |
1448 | unpackBits(Src: StorecntDscnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1449 | Width: getStorecntBitWidth(VersionMajor: Version.Major)); |
1450 | Decoded.DsCnt = unpackBits(Src: StorecntDscnt, Shift: getDscntBitShift(VersionMajor: Version.Major), |
1451 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1452 | return Decoded; |
1453 | } |
1454 | |
1455 | static unsigned encodeLoadcnt(const IsaVersion &Version, unsigned Waitcnt, |
1456 | unsigned Loadcnt) { |
1457 | return packBits(Src: Loadcnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1458 | Width: getLoadcntBitWidth(VersionMajor: Version.Major)); |
1459 | } |
1460 | |
1461 | static unsigned encodeStorecnt(const IsaVersion &Version, unsigned Waitcnt, |
1462 | unsigned Storecnt) { |
1463 | return packBits(Src: Storecnt, Dst: Waitcnt, Shift: getLoadcntStorecntBitShift(VersionMajor: Version.Major), |
1464 | Width: getStorecntBitWidth(VersionMajor: Version.Major)); |
1465 | } |
1466 | |
1467 | static unsigned encodeDscnt(const IsaVersion &Version, unsigned Waitcnt, |
1468 | unsigned Dscnt) { |
1469 | return packBits(Src: Dscnt, Dst: Waitcnt, Shift: getDscntBitShift(VersionMajor: Version.Major), |
1470 | Width: getDscntBitWidth(VersionMajor: Version.Major)); |
1471 | } |
1472 | |
1473 | static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, |
1474 | unsigned Dscnt) { |
1475 | unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: false); |
1476 | Waitcnt = encodeLoadcnt(Version, Waitcnt, Loadcnt); |
1477 | Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); |
1478 | return Waitcnt; |
1479 | } |
1480 | |
1481 | unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded) { |
1482 | return encodeLoadcntDscnt(Version, Loadcnt: Decoded.LoadCnt, Dscnt: Decoded.DsCnt); |
1483 | } |
1484 | |
1485 | static unsigned encodeStorecntDscnt(const IsaVersion &Version, |
1486 | unsigned Storecnt, unsigned Dscnt) { |
1487 | unsigned Waitcnt = getCombinedCountBitMask(Version, IsStore: true); |
1488 | Waitcnt = encodeStorecnt(Version, Waitcnt, Storecnt); |
1489 | Waitcnt = encodeDscnt(Version, Waitcnt, Dscnt); |
1490 | return Waitcnt; |
1491 | } |
1492 | |
1493 | unsigned encodeStorecntDscnt(const IsaVersion &Version, |
1494 | const Waitcnt &Decoded) { |
1495 | return encodeStorecntDscnt(Version, Storecnt: Decoded.StoreCnt, Dscnt: Decoded.DsCnt); |
1496 | } |
1497 | |
1498 | //===----------------------------------------------------------------------===// |
1499 | // Custom Operands. |
1500 | // |
1501 | // A table of custom operands shall describe "primary" operand names |
1502 | // first followed by aliases if any. It is not required but recommended |
1503 | // to arrange operands so that operand encoding match operand position |
1504 | // in the table. This will make disassembly a bit more efficient. |
1505 | // Unused slots in the table shall have an empty name. |
1506 | // |
1507 | //===----------------------------------------------------------------------===// |
1508 | |
1509 | template <class T> |
1510 | static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize, |
1511 | T Context) { |
1512 | return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() && |
1513 | (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)); |
1514 | } |
1515 | |
1516 | template <class T> |
1517 | static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test, |
1518 | const CustomOperand<T> OpInfo[], int OpInfoSize, |
1519 | T Context) { |
1520 | int InvalidIdx = OPR_ID_UNKNOWN; |
1521 | for (int Idx = 0; Idx < OpInfoSize; ++Idx) { |
1522 | if (Test(OpInfo[Idx])) { |
1523 | if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context)) |
1524 | return Idx; |
1525 | InvalidIdx = OPR_ID_UNSUPPORTED; |
1526 | } |
1527 | } |
1528 | return InvalidIdx; |
1529 | } |
1530 | |
1531 | template <class T> |
1532 | static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[], |
1533 | int OpInfoSize, T Context) { |
1534 | auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; }; |
1535 | return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); |
1536 | } |
1537 | |
1538 | template <class T> |
1539 | static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize, |
1540 | T Context, bool QuickCheck = true) { |
1541 | auto Test = [=](const CustomOperand<T> &Op) { |
1542 | return Op.Encoding == Id && !Op.Name.empty(); |
1543 | }; |
1544 | // This is an optimization that should work in most cases. |
1545 | // As a side effect, it may cause selection of an alias |
1546 | // instead of a primary operand name in case of sparse tables. |
1547 | if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) && |
1548 | OpInfo[Id].Encoding == Id) { |
1549 | return Id; |
1550 | } |
1551 | return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context); |
1552 | } |
1553 | |
1554 | //===----------------------------------------------------------------------===// |
1555 | // Custom Operand Values |
1556 | //===----------------------------------------------------------------------===// |
1557 | |
1558 | static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr, |
1559 | int Size, |
1560 | const MCSubtargetInfo &STI) { |
1561 | unsigned Enc = 0; |
1562 | for (int Idx = 0; Idx < Size; ++Idx) { |
1563 | const auto &Op = Opr[Idx]; |
1564 | if (Op.isSupported(STI)) |
1565 | Enc |= Op.encode(Val: Op.Default); |
1566 | } |
1567 | return Enc; |
1568 | } |
1569 | |
1570 | static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr, |
1571 | int Size, unsigned Code, |
1572 | bool &HasNonDefaultVal, |
1573 | const MCSubtargetInfo &STI) { |
1574 | unsigned UsedOprMask = 0; |
1575 | HasNonDefaultVal = false; |
1576 | for (int Idx = 0; Idx < Size; ++Idx) { |
1577 | const auto &Op = Opr[Idx]; |
1578 | if (!Op.isSupported(STI)) |
1579 | continue; |
1580 | UsedOprMask |= Op.getMask(); |
1581 | unsigned Val = Op.decode(Code); |
1582 | if (!Op.isValid(Val)) |
1583 | return false; |
1584 | HasNonDefaultVal |= (Val != Op.Default); |
1585 | } |
1586 | return (Code & ~UsedOprMask) == 0; |
1587 | } |
1588 | |
1589 | static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size, |
1590 | unsigned Code, int &Idx, StringRef &Name, |
1591 | unsigned &Val, bool &IsDefault, |
1592 | const MCSubtargetInfo &STI) { |
1593 | while (Idx < Size) { |
1594 | const auto &Op = Opr[Idx++]; |
1595 | if (Op.isSupported(STI)) { |
1596 | Name = Op.Name; |
1597 | Val = Op.decode(Code); |
1598 | IsDefault = (Val == Op.Default); |
1599 | return true; |
1600 | } |
1601 | } |
1602 | |
1603 | return false; |
1604 | } |
1605 | |
1606 | static int encodeCustomOperandVal(const CustomOperandVal &Op, |
1607 | int64_t InputVal) { |
1608 | if (InputVal < 0 || InputVal > Op.Max) |
1609 | return OPR_VAL_INVALID; |
1610 | return Op.encode(Val: InputVal); |
1611 | } |
1612 | |
1613 | static int encodeCustomOperand(const CustomOperandVal *Opr, int Size, |
1614 | const StringRef Name, int64_t InputVal, |
1615 | unsigned &UsedOprMask, |
1616 | const MCSubtargetInfo &STI) { |
1617 | int InvalidId = OPR_ID_UNKNOWN; |
1618 | for (int Idx = 0; Idx < Size; ++Idx) { |
1619 | const auto &Op = Opr[Idx]; |
1620 | if (Op.Name == Name) { |
1621 | if (!Op.isSupported(STI)) { |
1622 | InvalidId = OPR_ID_UNSUPPORTED; |
1623 | continue; |
1624 | } |
1625 | auto OprMask = Op.getMask(); |
1626 | if (OprMask & UsedOprMask) |
1627 | return OPR_ID_DUPLICATE; |
1628 | UsedOprMask |= OprMask; |
1629 | return encodeCustomOperandVal(Op, InputVal); |
1630 | } |
1631 | } |
1632 | return InvalidId; |
1633 | } |
1634 | |
1635 | //===----------------------------------------------------------------------===// |
1636 | // DepCtr |
1637 | //===----------------------------------------------------------------------===// |
1638 | |
1639 | namespace DepCtr { |
1640 | |
1641 | int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) { |
1642 | static int Default = -1; |
1643 | if (Default == -1) |
1644 | Default = getDefaultCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, STI); |
1645 | return Default; |
1646 | } |
1647 | |
1648 | bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, |
1649 | const MCSubtargetInfo &STI) { |
1650 | return isSymbolicCustomOperandEncoding(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, |
1651 | HasNonDefaultVal, STI); |
1652 | } |
1653 | |
1654 | bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, |
1655 | bool &IsDefault, const MCSubtargetInfo &STI) { |
1656 | return decodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Code, Idx&: Id, Name, Val, |
1657 | IsDefault, STI); |
1658 | } |
1659 | |
1660 | int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, |
1661 | const MCSubtargetInfo &STI) { |
1662 | return encodeCustomOperand(Opr: DepCtrInfo, Size: DEP_CTR_SIZE, Name, InputVal: Val, UsedOprMask, |
1663 | STI); |
1664 | } |
1665 | |
1666 | unsigned decodeFieldVmVsrc(unsigned Encoded) { |
1667 | return unpackBits(Src: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth()); |
1668 | } |
1669 | |
1670 | unsigned decodeFieldVaVdst(unsigned Encoded) { |
1671 | return unpackBits(Src: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth()); |
1672 | } |
1673 | |
1674 | unsigned decodeFieldSaSdst(unsigned Encoded) { |
1675 | return unpackBits(Src: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth()); |
1676 | } |
1677 | |
1678 | unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) { |
1679 | return packBits(Src: VmVsrc, Dst: Encoded, Shift: getVmVsrcBitShift(), Width: getVmVsrcBitWidth()); |
1680 | } |
1681 | |
1682 | unsigned encodeFieldVmVsrc(unsigned VmVsrc) { |
1683 | return encodeFieldVmVsrc(Encoded: 0xffff, VmVsrc); |
1684 | } |
1685 | |
1686 | unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) { |
1687 | return packBits(Src: VaVdst, Dst: Encoded, Shift: getVaVdstBitShift(), Width: getVaVdstBitWidth()); |
1688 | } |
1689 | |
1690 | unsigned encodeFieldVaVdst(unsigned VaVdst) { |
1691 | return encodeFieldVaVdst(Encoded: 0xffff, VaVdst); |
1692 | } |
1693 | |
1694 | unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) { |
1695 | return packBits(Src: SaSdst, Dst: Encoded, Shift: getSaSdstBitShift(), Width: getSaSdstBitWidth()); |
1696 | } |
1697 | |
1698 | unsigned encodeFieldSaSdst(unsigned SaSdst) { |
1699 | return encodeFieldSaSdst(Encoded: 0xffff, SaSdst); |
1700 | } |
1701 | |
1702 | } // namespace DepCtr |
1703 | |
1704 | //===----------------------------------------------------------------------===// |
1705 | // hwreg |
1706 | //===----------------------------------------------------------------------===// |
1707 | |
1708 | namespace Hwreg { |
1709 | |
1710 | int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) { |
1711 | int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI); |
1712 | return (Idx < 0) ? Idx : Opr[Idx].Encoding; |
1713 | } |
1714 | |
1715 | StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { |
1716 | int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI); |
1717 | return (Idx < 0) ? "" : Opr[Idx].Name; |
1718 | } |
1719 | |
1720 | } // namespace Hwreg |
1721 | |
1722 | //===----------------------------------------------------------------------===// |
1723 | // exp tgt |
1724 | //===----------------------------------------------------------------------===// |
1725 | |
1726 | namespace Exp { |
1727 | |
1728 | struct ExpTgt { |
1729 | StringLiteral Name; |
1730 | unsigned Tgt; |
1731 | unsigned MaxIndex; |
1732 | }; |
1733 | |
1734 | static constexpr ExpTgt ExpTgtInfo[] = { |
1735 | {{"null" }, .Tgt: ET_NULL, .MaxIndex: ET_NULL_MAX_IDX}, |
1736 | {{"mrtz" }, .Tgt: ET_MRTZ, .MaxIndex: ET_MRTZ_MAX_IDX}, |
1737 | {{"prim" }, .Tgt: ET_PRIM, .MaxIndex: ET_PRIM_MAX_IDX}, |
1738 | {{"mrt" }, .Tgt: ET_MRT0, .MaxIndex: ET_MRT_MAX_IDX}, |
1739 | {{"pos" }, .Tgt: ET_POS0, .MaxIndex: ET_POS_MAX_IDX}, |
1740 | {{"dual_src_blend" }, .Tgt: ET_DUAL_SRC_BLEND0, .MaxIndex: ET_DUAL_SRC_BLEND_MAX_IDX}, |
1741 | {{"param" }, .Tgt: ET_PARAM0, .MaxIndex: ET_PARAM_MAX_IDX}, |
1742 | }; |
1743 | |
1744 | bool getTgtName(unsigned Id, StringRef &Name, int &Index) { |
1745 | for (const ExpTgt &Val : ExpTgtInfo) { |
1746 | if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { |
1747 | Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); |
1748 | Name = Val.Name; |
1749 | return true; |
1750 | } |
1751 | } |
1752 | return false; |
1753 | } |
1754 | |
1755 | unsigned getTgtId(const StringRef Name) { |
1756 | |
1757 | for (const ExpTgt &Val : ExpTgtInfo) { |
1758 | if (Val.MaxIndex == 0 && Name == Val.Name) |
1759 | return Val.Tgt; |
1760 | |
1761 | if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) { |
1762 | StringRef Suffix = Name.drop_front(Val.Name.size()); |
1763 | |
1764 | unsigned Id; |
1765 | if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) |
1766 | return ET_INVALID; |
1767 | |
1768 | // Disable leading zeroes |
1769 | if (Suffix.size() > 1 && Suffix[0] == '0') |
1770 | return ET_INVALID; |
1771 | |
1772 | return Val.Tgt + Id; |
1773 | } |
1774 | } |
1775 | return ET_INVALID; |
1776 | } |
1777 | |
1778 | bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { |
1779 | switch (Id) { |
1780 | case ET_NULL: |
1781 | return !isGFX11Plus(STI); |
1782 | case ET_POS4: |
1783 | case ET_PRIM: |
1784 | return isGFX10Plus(STI); |
1785 | case ET_DUAL_SRC_BLEND0: |
1786 | case ET_DUAL_SRC_BLEND1: |
1787 | return isGFX11Plus(STI); |
1788 | default: |
1789 | if (Id >= ET_PARAM0 && Id <= ET_PARAM31) |
1790 | return !isGFX11Plus(STI); |
1791 | return true; |
1792 | } |
1793 | } |
1794 | |
1795 | } // namespace Exp |
1796 | |
1797 | //===----------------------------------------------------------------------===// |
1798 | // MTBUF Format |
1799 | //===----------------------------------------------------------------------===// |
1800 | |
1801 | namespace MTBUFFormat { |
1802 | |
1803 | int64_t getDfmt(const StringRef Name) { |
1804 | for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { |
1805 | if (Name == DfmtSymbolic[Id]) |
1806 | return Id; |
1807 | } |
1808 | return DFMT_UNDEF; |
1809 | } |
1810 | |
1811 | StringRef getDfmtName(unsigned Id) { |
1812 | assert(Id <= DFMT_MAX); |
1813 | return DfmtSymbolic[Id]; |
1814 | } |
1815 | |
1816 | static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { |
1817 | if (isSI(STI) || isCI(STI)) |
1818 | return NfmtSymbolicSICI; |
1819 | if (isVI(STI) || isGFX9(STI)) |
1820 | return NfmtSymbolicVI; |
1821 | return NfmtSymbolicGFX10; |
1822 | } |
1823 | |
1824 | int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { |
1825 | auto lookupTable = getNfmtLookupTable(STI); |
1826 | for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { |
1827 | if (Name == lookupTable[Id]) |
1828 | return Id; |
1829 | } |
1830 | return NFMT_UNDEF; |
1831 | } |
1832 | |
1833 | StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { |
1834 | assert(Id <= NFMT_MAX); |
1835 | return getNfmtLookupTable(STI)[Id]; |
1836 | } |
1837 | |
1838 | bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { |
1839 | unsigned Dfmt; |
1840 | unsigned Nfmt; |
1841 | decodeDfmtNfmt(Format: Id, Dfmt, Nfmt); |
1842 | return isValidNfmt(Val: Nfmt, STI); |
1843 | } |
1844 | |
1845 | bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { |
1846 | return !getNfmtName(Id, STI).empty(); |
1847 | } |
1848 | |
1849 | int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { |
1850 | return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); |
1851 | } |
1852 | |
1853 | void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { |
1854 | Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; |
1855 | Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; |
1856 | } |
1857 | |
1858 | int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) { |
1859 | if (isGFX11Plus(STI)) { |
1860 | for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { |
1861 | if (Name == UfmtSymbolicGFX11[Id]) |
1862 | return Id; |
1863 | } |
1864 | } else { |
1865 | for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { |
1866 | if (Name == UfmtSymbolicGFX10[Id]) |
1867 | return Id; |
1868 | } |
1869 | } |
1870 | return UFMT_UNDEF; |
1871 | } |
1872 | |
1873 | StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) { |
1874 | if(isValidUnifiedFormat(Val: Id, STI)) |
1875 | return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id]; |
1876 | return "" ; |
1877 | } |
1878 | |
1879 | bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) { |
1880 | return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST; |
1881 | } |
1882 | |
1883 | int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, |
1884 | const MCSubtargetInfo &STI) { |
1885 | int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); |
1886 | if (isGFX11Plus(STI)) { |
1887 | for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) { |
1888 | if (Fmt == DfmtNfmt2UFmtGFX11[Id]) |
1889 | return Id; |
1890 | } |
1891 | } else { |
1892 | for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) { |
1893 | if (Fmt == DfmtNfmt2UFmtGFX10[Id]) |
1894 | return Id; |
1895 | } |
1896 | } |
1897 | return UFMT_UNDEF; |
1898 | } |
1899 | |
1900 | bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { |
1901 | return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); |
1902 | } |
1903 | |
1904 | unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { |
1905 | if (isGFX10Plus(STI)) |
1906 | return UFMT_DEFAULT; |
1907 | return DFMT_NFMT_DEFAULT; |
1908 | } |
1909 | |
1910 | } // namespace MTBUFFormat |
1911 | |
1912 | //===----------------------------------------------------------------------===// |
1913 | // SendMsg |
1914 | //===----------------------------------------------------------------------===// |
1915 | |
1916 | namespace SendMsg { |
1917 | |
1918 | static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) { |
1919 | return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_; |
1920 | } |
1921 | |
1922 | int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) { |
1923 | int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI); |
1924 | return (Idx < 0) ? Idx : Msg[Idx].Encoding; |
1925 | } |
1926 | |
1927 | bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) { |
1928 | return (MsgId & ~(getMsgIdMask(STI))) == 0; |
1929 | } |
1930 | |
1931 | StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) { |
1932 | int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI); |
1933 | return (Idx < 0) ? "" : Msg[Idx].Name; |
1934 | } |
1935 | |
1936 | int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { |
1937 | const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; |
1938 | const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; |
1939 | const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; |
1940 | for (int i = F; i < L; ++i) { |
1941 | if (Name == S[i]) { |
1942 | return i; |
1943 | } |
1944 | } |
1945 | return OP_UNKNOWN_; |
1946 | } |
1947 | |
1948 | bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, |
1949 | bool Strict) { |
1950 | assert(isValidMsgId(MsgId, STI)); |
1951 | |
1952 | if (!Strict) |
1953 | return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); |
1954 | |
1955 | if (MsgId == ID_SYSMSG) |
1956 | return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; |
1957 | if (!isGFX11Plus(STI)) { |
1958 | switch (MsgId) { |
1959 | case ID_GS_PreGFX11: |
1960 | return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; |
1961 | case ID_GS_DONE_PreGFX11: |
1962 | return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; |
1963 | } |
1964 | } |
1965 | return OpId == OP_NONE_; |
1966 | } |
1967 | |
1968 | StringRef getMsgOpName(int64_t MsgId, int64_t OpId, |
1969 | const MCSubtargetInfo &STI) { |
1970 | assert(msgRequiresOp(MsgId, STI)); |
1971 | return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; |
1972 | } |
1973 | |
1974 | bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, |
1975 | const MCSubtargetInfo &STI, bool Strict) { |
1976 | assert(isValidMsgOp(MsgId, OpId, STI, Strict)); |
1977 | |
1978 | if (!Strict) |
1979 | return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); |
1980 | |
1981 | if (!isGFX11Plus(STI)) { |
1982 | switch (MsgId) { |
1983 | case ID_GS_PreGFX11: |
1984 | return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; |
1985 | case ID_GS_DONE_PreGFX11: |
1986 | return (OpId == OP_GS_NOP) ? |
1987 | (StreamId == STREAM_ID_NONE_) : |
1988 | (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); |
1989 | } |
1990 | } |
1991 | return StreamId == STREAM_ID_NONE_; |
1992 | } |
1993 | |
1994 | bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) { |
1995 | return MsgId == ID_SYSMSG || |
1996 | (!isGFX11Plus(STI) && |
1997 | (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11)); |
1998 | } |
1999 | |
2000 | bool msgSupportsStream(int64_t MsgId, int64_t OpId, |
2001 | const MCSubtargetInfo &STI) { |
2002 | return !isGFX11Plus(STI) && |
2003 | (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) && |
2004 | OpId != OP_GS_NOP; |
2005 | } |
2006 | |
2007 | void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, |
2008 | uint16_t &StreamId, const MCSubtargetInfo &STI) { |
2009 | MsgId = Val & getMsgIdMask(STI); |
2010 | if (isGFX11Plus(STI)) { |
2011 | OpId = 0; |
2012 | StreamId = 0; |
2013 | } else { |
2014 | OpId = (Val & OP_MASK_) >> OP_SHIFT_; |
2015 | StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; |
2016 | } |
2017 | } |
2018 | |
2019 | uint64_t encodeMsg(uint64_t MsgId, |
2020 | uint64_t OpId, |
2021 | uint64_t StreamId) { |
2022 | return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_); |
2023 | } |
2024 | |
2025 | } // namespace SendMsg |
2026 | |
2027 | //===----------------------------------------------------------------------===// |
2028 | // |
2029 | //===----------------------------------------------------------------------===// |
2030 | |
2031 | unsigned getInitialPSInputAddr(const Function &F) { |
2032 | return F.getFnAttributeAsParsedInteger(Kind: "InitialPSInputAddr" , Default: 0); |
2033 | } |
2034 | |
2035 | bool getHasColorExport(const Function &F) { |
2036 | // As a safe default always respond as if PS has color exports. |
2037 | return F.getFnAttributeAsParsedInteger( |
2038 | Kind: "amdgpu-color-export" , |
2039 | Default: F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0; |
2040 | } |
2041 | |
2042 | bool getHasDepthExport(const Function &F) { |
2043 | return F.getFnAttributeAsParsedInteger(Kind: "amdgpu-depth-export" , Default: 0) != 0; |
2044 | } |
2045 | |
2046 | bool isShader(CallingConv::ID cc) { |
2047 | switch(cc) { |
2048 | case CallingConv::AMDGPU_VS: |
2049 | case CallingConv::AMDGPU_LS: |
2050 | case CallingConv::AMDGPU_HS: |
2051 | case CallingConv::AMDGPU_ES: |
2052 | case CallingConv::AMDGPU_GS: |
2053 | case CallingConv::AMDGPU_PS: |
2054 | case CallingConv::AMDGPU_CS_Chain: |
2055 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2056 | case CallingConv::AMDGPU_CS: |
2057 | return true; |
2058 | default: |
2059 | return false; |
2060 | } |
2061 | } |
2062 | |
2063 | bool isGraphics(CallingConv::ID cc) { |
2064 | return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; |
2065 | } |
2066 | |
2067 | bool isCompute(CallingConv::ID cc) { |
2068 | return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; |
2069 | } |
2070 | |
2071 | bool isEntryFunctionCC(CallingConv::ID CC) { |
2072 | switch (CC) { |
2073 | case CallingConv::AMDGPU_KERNEL: |
2074 | case CallingConv::SPIR_KERNEL: |
2075 | case CallingConv::AMDGPU_VS: |
2076 | case CallingConv::AMDGPU_GS: |
2077 | case CallingConv::AMDGPU_PS: |
2078 | case CallingConv::AMDGPU_CS: |
2079 | case CallingConv::AMDGPU_ES: |
2080 | case CallingConv::AMDGPU_HS: |
2081 | case CallingConv::AMDGPU_LS: |
2082 | return true; |
2083 | default: |
2084 | return false; |
2085 | } |
2086 | } |
2087 | |
2088 | bool isModuleEntryFunctionCC(CallingConv::ID CC) { |
2089 | switch (CC) { |
2090 | case CallingConv::AMDGPU_Gfx: |
2091 | return true; |
2092 | default: |
2093 | return isEntryFunctionCC(CC) || isChainCC(CC); |
2094 | } |
2095 | } |
2096 | |
2097 | bool isChainCC(CallingConv::ID CC) { |
2098 | switch (CC) { |
2099 | case CallingConv::AMDGPU_CS_Chain: |
2100 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2101 | return true; |
2102 | default: |
2103 | return false; |
2104 | } |
2105 | } |
2106 | |
2107 | bool isKernelCC(const Function *Func) { |
2108 | return AMDGPU::isModuleEntryFunctionCC(CC: Func->getCallingConv()); |
2109 | } |
2110 | |
2111 | bool hasXNACK(const MCSubtargetInfo &STI) { |
2112 | return STI.hasFeature(AMDGPU::FeatureXNACK); |
2113 | } |
2114 | |
2115 | bool hasSRAMECC(const MCSubtargetInfo &STI) { |
2116 | return STI.hasFeature(AMDGPU::FeatureSRAMECC); |
2117 | } |
2118 | |
2119 | bool hasMIMG_R128(const MCSubtargetInfo &STI) { |
2120 | return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16); |
2121 | } |
2122 | |
2123 | bool hasA16(const MCSubtargetInfo &STI) { |
2124 | return STI.hasFeature(AMDGPU::FeatureA16); |
2125 | } |
2126 | |
2127 | bool hasG16(const MCSubtargetInfo &STI) { |
2128 | return STI.hasFeature(AMDGPU::FeatureG16); |
2129 | } |
2130 | |
2131 | bool hasPackedD16(const MCSubtargetInfo &STI) { |
2132 | return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) && |
2133 | !isSI(STI); |
2134 | } |
2135 | |
2136 | bool hasGDS(const MCSubtargetInfo &STI) { |
2137 | return STI.hasFeature(AMDGPU::FeatureGDS); |
2138 | } |
2139 | |
2140 | unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) { |
2141 | auto Version = getIsaVersion(GPU: STI.getCPU()); |
2142 | if (Version.Major == 10) |
2143 | return Version.Minor >= 3 ? 13 : 5; |
2144 | if (Version.Major == 11) |
2145 | return 5; |
2146 | if (Version.Major >= 12) |
2147 | return HasSampler ? 4 : 5; |
2148 | return 0; |
2149 | } |
2150 | |
2151 | unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; } |
2152 | |
2153 | bool isSI(const MCSubtargetInfo &STI) { |
2154 | return STI.hasFeature(AMDGPU::FeatureSouthernIslands); |
2155 | } |
2156 | |
2157 | bool isCI(const MCSubtargetInfo &STI) { |
2158 | return STI.hasFeature(AMDGPU::FeatureSeaIslands); |
2159 | } |
2160 | |
2161 | bool isVI(const MCSubtargetInfo &STI) { |
2162 | return STI.hasFeature(AMDGPU::FeatureVolcanicIslands); |
2163 | } |
2164 | |
2165 | bool isGFX9(const MCSubtargetInfo &STI) { |
2166 | return STI.hasFeature(AMDGPU::FeatureGFX9); |
2167 | } |
2168 | |
2169 | bool isGFX9_GFX10(const MCSubtargetInfo &STI) { |
2170 | return isGFX9(STI) || isGFX10(STI); |
2171 | } |
2172 | |
2173 | bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) { |
2174 | return isGFX9(STI) || isGFX10(STI) || isGFX11(STI); |
2175 | } |
2176 | |
2177 | bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) { |
2178 | return isVI(STI) || isGFX9(STI) || isGFX10(STI); |
2179 | } |
2180 | |
2181 | bool isGFX8Plus(const MCSubtargetInfo &STI) { |
2182 | return isVI(STI) || isGFX9Plus(STI); |
2183 | } |
2184 | |
2185 | bool isGFX9Plus(const MCSubtargetInfo &STI) { |
2186 | return isGFX9(STI) || isGFX10Plus(STI); |
2187 | } |
2188 | |
2189 | bool isGFX10(const MCSubtargetInfo &STI) { |
2190 | return STI.hasFeature(AMDGPU::FeatureGFX10); |
2191 | } |
2192 | |
2193 | bool isGFX10_GFX11(const MCSubtargetInfo &STI) { |
2194 | return isGFX10(STI) || isGFX11(STI); |
2195 | } |
2196 | |
2197 | bool isGFX10Plus(const MCSubtargetInfo &STI) { |
2198 | return isGFX10(STI) || isGFX11Plus(STI); |
2199 | } |
2200 | |
2201 | bool isGFX11(const MCSubtargetInfo &STI) { |
2202 | return STI.hasFeature(AMDGPU::FeatureGFX11); |
2203 | } |
2204 | |
2205 | bool isGFX11Plus(const MCSubtargetInfo &STI) { |
2206 | return isGFX11(STI) || isGFX12Plus(STI); |
2207 | } |
2208 | |
2209 | bool isGFX12(const MCSubtargetInfo &STI) { |
2210 | return STI.getFeatureBits()[AMDGPU::FeatureGFX12]; |
2211 | } |
2212 | |
2213 | bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); } |
2214 | |
2215 | bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); } |
2216 | |
2217 | bool isNotGFX11Plus(const MCSubtargetInfo &STI) { |
2218 | return !isGFX11Plus(STI); |
2219 | } |
2220 | |
2221 | bool isNotGFX10Plus(const MCSubtargetInfo &STI) { |
2222 | return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI); |
2223 | } |
2224 | |
2225 | bool isGFX10Before1030(const MCSubtargetInfo &STI) { |
2226 | return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI); |
2227 | } |
2228 | |
2229 | bool isGCN3Encoding(const MCSubtargetInfo &STI) { |
2230 | return STI.hasFeature(AMDGPU::FeatureGCN3Encoding); |
2231 | } |
2232 | |
2233 | bool isGFX10_AEncoding(const MCSubtargetInfo &STI) { |
2234 | return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding); |
2235 | } |
2236 | |
2237 | bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { |
2238 | return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding); |
2239 | } |
2240 | |
2241 | bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { |
2242 | return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts); |
2243 | } |
2244 | |
2245 | bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) { |
2246 | return isGFX10_BEncoding(STI) && !isGFX12Plus(STI); |
2247 | } |
2248 | |
2249 | bool isGFX90A(const MCSubtargetInfo &STI) { |
2250 | return STI.hasFeature(AMDGPU::FeatureGFX90AInsts); |
2251 | } |
2252 | |
2253 | bool isGFX940(const MCSubtargetInfo &STI) { |
2254 | return STI.hasFeature(AMDGPU::FeatureGFX940Insts); |
2255 | } |
2256 | |
2257 | bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) { |
2258 | return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch); |
2259 | } |
2260 | |
2261 | bool hasMAIInsts(const MCSubtargetInfo &STI) { |
2262 | return STI.hasFeature(AMDGPU::FeatureMAIInsts); |
2263 | } |
2264 | |
2265 | bool hasVOPD(const MCSubtargetInfo &STI) { |
2266 | return STI.hasFeature(AMDGPU::FeatureVOPD); |
2267 | } |
2268 | |
2269 | bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) { |
2270 | return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR); |
2271 | } |
2272 | |
2273 | unsigned hasKernargPreload(const MCSubtargetInfo &STI) { |
2274 | return STI.hasFeature(AMDGPU::FeatureKernargPreload); |
2275 | } |
2276 | |
2277 | int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, |
2278 | int32_t ArgNumVGPR) { |
2279 | if (has90AInsts && ArgNumAGPR) |
2280 | return alignTo(Value: ArgNumVGPR, Align: 4) + ArgNumAGPR; |
2281 | return std::max(ArgNumVGPR, ArgNumAGPR); |
2282 | } |
2283 | |
2284 | bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { |
2285 | const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); |
2286 | const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); |
2287 | return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || |
2288 | Reg == AMDGPU::SCC; |
2289 | } |
2290 | |
2291 | bool isHi(unsigned Reg, const MCRegisterInfo &MRI) { |
2292 | return MRI.getEncodingValue(RegNo: Reg) & AMDGPU::HWEncoding::IS_HI; |
2293 | } |
2294 | |
2295 | #define MAP_REG2REG \ |
2296 | using namespace AMDGPU; \ |
2297 | switch(Reg) { \ |
2298 | default: return Reg; \ |
2299 | CASE_CI_VI(FLAT_SCR) \ |
2300 | CASE_CI_VI(FLAT_SCR_LO) \ |
2301 | CASE_CI_VI(FLAT_SCR_HI) \ |
2302 | CASE_VI_GFX9PLUS(TTMP0) \ |
2303 | CASE_VI_GFX9PLUS(TTMP1) \ |
2304 | CASE_VI_GFX9PLUS(TTMP2) \ |
2305 | CASE_VI_GFX9PLUS(TTMP3) \ |
2306 | CASE_VI_GFX9PLUS(TTMP4) \ |
2307 | CASE_VI_GFX9PLUS(TTMP5) \ |
2308 | CASE_VI_GFX9PLUS(TTMP6) \ |
2309 | CASE_VI_GFX9PLUS(TTMP7) \ |
2310 | CASE_VI_GFX9PLUS(TTMP8) \ |
2311 | CASE_VI_GFX9PLUS(TTMP9) \ |
2312 | CASE_VI_GFX9PLUS(TTMP10) \ |
2313 | CASE_VI_GFX9PLUS(TTMP11) \ |
2314 | CASE_VI_GFX9PLUS(TTMP12) \ |
2315 | CASE_VI_GFX9PLUS(TTMP13) \ |
2316 | CASE_VI_GFX9PLUS(TTMP14) \ |
2317 | CASE_VI_GFX9PLUS(TTMP15) \ |
2318 | CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ |
2319 | CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ |
2320 | CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ |
2321 | CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ |
2322 | CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ |
2323 | CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ |
2324 | CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ |
2325 | CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ |
2326 | CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ |
2327 | CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ |
2328 | CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ |
2329 | CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ |
2330 | CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ |
2331 | CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ |
2332 | CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ |
2333 | CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ |
2334 | CASE_GFXPRE11_GFX11PLUS(M0) \ |
2335 | CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \ |
2336 | CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \ |
2337 | } |
2338 | |
2339 | #define CASE_CI_VI(node) \ |
2340 | assert(!isSI(STI)); \ |
2341 | case node: return isCI(STI) ? node##_ci : node##_vi; |
2342 | |
2343 | #define CASE_VI_GFX9PLUS(node) \ |
2344 | case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; |
2345 | |
2346 | #define CASE_GFXPRE11_GFX11PLUS(node) \ |
2347 | case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11; |
2348 | |
2349 | #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \ |
2350 | case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11; |
2351 | |
2352 | unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { |
2353 | if (STI.getTargetTriple().getArch() == Triple::r600) |
2354 | return Reg; |
2355 | MAP_REG2REG |
2356 | } |
2357 | |
2358 | #undef CASE_CI_VI |
2359 | #undef CASE_VI_GFX9PLUS |
2360 | #undef CASE_GFXPRE11_GFX11PLUS |
2361 | #undef CASE_GFXPRE11_GFX11PLUS_TO |
2362 | |
2363 | #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; |
2364 | #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; |
2365 | #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node; |
2366 | #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) |
2367 | |
2368 | unsigned mc2PseudoReg(unsigned Reg) { |
2369 | MAP_REG2REG |
2370 | } |
2371 | |
2372 | bool isInlineValue(unsigned Reg) { |
2373 | switch (Reg) { |
2374 | case AMDGPU::SRC_SHARED_BASE_LO: |
2375 | case AMDGPU::SRC_SHARED_BASE: |
2376 | case AMDGPU::SRC_SHARED_LIMIT_LO: |
2377 | case AMDGPU::SRC_SHARED_LIMIT: |
2378 | case AMDGPU::SRC_PRIVATE_BASE_LO: |
2379 | case AMDGPU::SRC_PRIVATE_BASE: |
2380 | case AMDGPU::SRC_PRIVATE_LIMIT_LO: |
2381 | case AMDGPU::SRC_PRIVATE_LIMIT: |
2382 | case AMDGPU::SRC_POPS_EXITING_WAVE_ID: |
2383 | return true; |
2384 | case AMDGPU::SRC_VCCZ: |
2385 | case AMDGPU::SRC_EXECZ: |
2386 | case AMDGPU::SRC_SCC: |
2387 | return true; |
2388 | case AMDGPU::SGPR_NULL: |
2389 | return true; |
2390 | default: |
2391 | return false; |
2392 | } |
2393 | } |
2394 | |
2395 | #undef CASE_CI_VI |
2396 | #undef CASE_VI_GFX9PLUS |
2397 | #undef CASE_GFXPRE11_GFX11PLUS |
2398 | #undef CASE_GFXPRE11_GFX11PLUS_TO |
2399 | #undef MAP_REG2REG |
2400 | |
2401 | bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2402 | assert(OpNo < Desc.NumOperands); |
2403 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2404 | return OpType >= AMDGPU::OPERAND_SRC_FIRST && |
2405 | OpType <= AMDGPU::OPERAND_SRC_LAST; |
2406 | } |
2407 | |
2408 | bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2409 | assert(OpNo < Desc.NumOperands); |
2410 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2411 | return OpType >= AMDGPU::OPERAND_KIMM_FIRST && |
2412 | OpType <= AMDGPU::OPERAND_KIMM_LAST; |
2413 | } |
2414 | |
2415 | bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2416 | assert(OpNo < Desc.NumOperands); |
2417 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2418 | switch (OpType) { |
2419 | case AMDGPU::OPERAND_REG_IMM_FP32: |
2420 | case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: |
2421 | case AMDGPU::OPERAND_REG_IMM_FP64: |
2422 | case AMDGPU::OPERAND_REG_IMM_FP16: |
2423 | case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: |
2424 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2425 | case AMDGPU::OPERAND_REG_INLINE_C_FP32: |
2426 | case AMDGPU::OPERAND_REG_INLINE_C_FP64: |
2427 | case AMDGPU::OPERAND_REG_INLINE_C_FP16: |
2428 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
2429 | case AMDGPU::OPERAND_REG_INLINE_AC_FP32: |
2430 | case AMDGPU::OPERAND_REG_INLINE_AC_FP16: |
2431 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: |
2432 | case AMDGPU::OPERAND_REG_IMM_V2FP32: |
2433 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: |
2434 | case AMDGPU::OPERAND_REG_INLINE_AC_FP64: |
2435 | return true; |
2436 | default: |
2437 | return false; |
2438 | } |
2439 | } |
2440 | |
2441 | bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { |
2442 | assert(OpNo < Desc.NumOperands); |
2443 | unsigned OpType = Desc.operands()[OpNo].OperandType; |
2444 | return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && |
2445 | OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) || |
2446 | (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && |
2447 | OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST); |
2448 | } |
2449 | |
2450 | // Avoid using MCRegisterClass::getSize, since that function will go away |
2451 | // (move from MC* level to Target* level). Return size in bits. |
2452 | unsigned getRegBitWidth(unsigned RCID) { |
2453 | switch (RCID) { |
2454 | case AMDGPU::SGPR_LO16RegClassID: |
2455 | case AMDGPU::AGPR_LO16RegClassID: |
2456 | return 16; |
2457 | case AMDGPU::SGPR_32RegClassID: |
2458 | case AMDGPU::VGPR_32RegClassID: |
2459 | case AMDGPU::VRegOrLds_32RegClassID: |
2460 | case AMDGPU::AGPR_32RegClassID: |
2461 | case AMDGPU::VS_32RegClassID: |
2462 | case AMDGPU::AV_32RegClassID: |
2463 | case AMDGPU::SReg_32RegClassID: |
2464 | case AMDGPU::SReg_32_XM0RegClassID: |
2465 | case AMDGPU::SRegOrLds_32RegClassID: |
2466 | return 32; |
2467 | case AMDGPU::SGPR_64RegClassID: |
2468 | case AMDGPU::VS_64RegClassID: |
2469 | case AMDGPU::SReg_64RegClassID: |
2470 | case AMDGPU::VReg_64RegClassID: |
2471 | case AMDGPU::AReg_64RegClassID: |
2472 | case AMDGPU::SReg_64_XEXECRegClassID: |
2473 | case AMDGPU::VReg_64_Align2RegClassID: |
2474 | case AMDGPU::AReg_64_Align2RegClassID: |
2475 | case AMDGPU::AV_64RegClassID: |
2476 | case AMDGPU::AV_64_Align2RegClassID: |
2477 | return 64; |
2478 | case AMDGPU::SGPR_96RegClassID: |
2479 | case AMDGPU::SReg_96RegClassID: |
2480 | case AMDGPU::VReg_96RegClassID: |
2481 | case AMDGPU::AReg_96RegClassID: |
2482 | case AMDGPU::VReg_96_Align2RegClassID: |
2483 | case AMDGPU::AReg_96_Align2RegClassID: |
2484 | case AMDGPU::AV_96RegClassID: |
2485 | case AMDGPU::AV_96_Align2RegClassID: |
2486 | return 96; |
2487 | case AMDGPU::SGPR_128RegClassID: |
2488 | case AMDGPU::SReg_128RegClassID: |
2489 | case AMDGPU::VReg_128RegClassID: |
2490 | case AMDGPU::AReg_128RegClassID: |
2491 | case AMDGPU::VReg_128_Align2RegClassID: |
2492 | case AMDGPU::AReg_128_Align2RegClassID: |
2493 | case AMDGPU::AV_128RegClassID: |
2494 | case AMDGPU::AV_128_Align2RegClassID: |
2495 | return 128; |
2496 | case AMDGPU::SGPR_160RegClassID: |
2497 | case AMDGPU::SReg_160RegClassID: |
2498 | case AMDGPU::VReg_160RegClassID: |
2499 | case AMDGPU::AReg_160RegClassID: |
2500 | case AMDGPU::VReg_160_Align2RegClassID: |
2501 | case AMDGPU::AReg_160_Align2RegClassID: |
2502 | case AMDGPU::AV_160RegClassID: |
2503 | case AMDGPU::AV_160_Align2RegClassID: |
2504 | return 160; |
2505 | case AMDGPU::SGPR_192RegClassID: |
2506 | case AMDGPU::SReg_192RegClassID: |
2507 | case AMDGPU::VReg_192RegClassID: |
2508 | case AMDGPU::AReg_192RegClassID: |
2509 | case AMDGPU::VReg_192_Align2RegClassID: |
2510 | case AMDGPU::AReg_192_Align2RegClassID: |
2511 | case AMDGPU::AV_192RegClassID: |
2512 | case AMDGPU::AV_192_Align2RegClassID: |
2513 | return 192; |
2514 | case AMDGPU::SGPR_224RegClassID: |
2515 | case AMDGPU::SReg_224RegClassID: |
2516 | case AMDGPU::VReg_224RegClassID: |
2517 | case AMDGPU::AReg_224RegClassID: |
2518 | case AMDGPU::VReg_224_Align2RegClassID: |
2519 | case AMDGPU::AReg_224_Align2RegClassID: |
2520 | case AMDGPU::AV_224RegClassID: |
2521 | case AMDGPU::AV_224_Align2RegClassID: |
2522 | return 224; |
2523 | case AMDGPU::SGPR_256RegClassID: |
2524 | case AMDGPU::SReg_256RegClassID: |
2525 | case AMDGPU::VReg_256RegClassID: |
2526 | case AMDGPU::AReg_256RegClassID: |
2527 | case AMDGPU::VReg_256_Align2RegClassID: |
2528 | case AMDGPU::AReg_256_Align2RegClassID: |
2529 | case AMDGPU::AV_256RegClassID: |
2530 | case AMDGPU::AV_256_Align2RegClassID: |
2531 | return 256; |
2532 | case AMDGPU::SGPR_288RegClassID: |
2533 | case AMDGPU::SReg_288RegClassID: |
2534 | case AMDGPU::VReg_288RegClassID: |
2535 | case AMDGPU::AReg_288RegClassID: |
2536 | case AMDGPU::VReg_288_Align2RegClassID: |
2537 | case AMDGPU::AReg_288_Align2RegClassID: |
2538 | case AMDGPU::AV_288RegClassID: |
2539 | case AMDGPU::AV_288_Align2RegClassID: |
2540 | return 288; |
2541 | case AMDGPU::SGPR_320RegClassID: |
2542 | case AMDGPU::SReg_320RegClassID: |
2543 | case AMDGPU::VReg_320RegClassID: |
2544 | case AMDGPU::AReg_320RegClassID: |
2545 | case AMDGPU::VReg_320_Align2RegClassID: |
2546 | case AMDGPU::AReg_320_Align2RegClassID: |
2547 | case AMDGPU::AV_320RegClassID: |
2548 | case AMDGPU::AV_320_Align2RegClassID: |
2549 | return 320; |
2550 | case AMDGPU::SGPR_352RegClassID: |
2551 | case AMDGPU::SReg_352RegClassID: |
2552 | case AMDGPU::VReg_352RegClassID: |
2553 | case AMDGPU::AReg_352RegClassID: |
2554 | case AMDGPU::VReg_352_Align2RegClassID: |
2555 | case AMDGPU::AReg_352_Align2RegClassID: |
2556 | case AMDGPU::AV_352RegClassID: |
2557 | case AMDGPU::AV_352_Align2RegClassID: |
2558 | return 352; |
2559 | case AMDGPU::SGPR_384RegClassID: |
2560 | case AMDGPU::SReg_384RegClassID: |
2561 | case AMDGPU::VReg_384RegClassID: |
2562 | case AMDGPU::AReg_384RegClassID: |
2563 | case AMDGPU::VReg_384_Align2RegClassID: |
2564 | case AMDGPU::AReg_384_Align2RegClassID: |
2565 | case AMDGPU::AV_384RegClassID: |
2566 | case AMDGPU::AV_384_Align2RegClassID: |
2567 | return 384; |
2568 | case AMDGPU::SGPR_512RegClassID: |
2569 | case AMDGPU::SReg_512RegClassID: |
2570 | case AMDGPU::VReg_512RegClassID: |
2571 | case AMDGPU::AReg_512RegClassID: |
2572 | case AMDGPU::VReg_512_Align2RegClassID: |
2573 | case AMDGPU::AReg_512_Align2RegClassID: |
2574 | case AMDGPU::AV_512RegClassID: |
2575 | case AMDGPU::AV_512_Align2RegClassID: |
2576 | return 512; |
2577 | case AMDGPU::SGPR_1024RegClassID: |
2578 | case AMDGPU::SReg_1024RegClassID: |
2579 | case AMDGPU::VReg_1024RegClassID: |
2580 | case AMDGPU::AReg_1024RegClassID: |
2581 | case AMDGPU::VReg_1024_Align2RegClassID: |
2582 | case AMDGPU::AReg_1024_Align2RegClassID: |
2583 | case AMDGPU::AV_1024RegClassID: |
2584 | case AMDGPU::AV_1024_Align2RegClassID: |
2585 | return 1024; |
2586 | default: |
2587 | llvm_unreachable("Unexpected register class" ); |
2588 | } |
2589 | } |
2590 | |
2591 | unsigned getRegBitWidth(const MCRegisterClass &RC) { |
2592 | return getRegBitWidth(RCID: RC.getID()); |
2593 | } |
2594 | |
2595 | unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, |
2596 | unsigned OpNo) { |
2597 | assert(OpNo < Desc.NumOperands); |
2598 | unsigned RCID = Desc.operands()[OpNo].RegClass; |
2599 | return getRegBitWidth(RCID) / 8; |
2600 | } |
2601 | |
2602 | bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { |
2603 | if (isInlinableIntLiteral(Literal)) |
2604 | return true; |
2605 | |
2606 | uint64_t Val = static_cast<uint64_t>(Literal); |
2607 | return (Val == llvm::bit_cast<uint64_t>(0.0)) || |
2608 | (Val == llvm::bit_cast<uint64_t>(1.0)) || |
2609 | (Val == llvm::bit_cast<uint64_t>(-1.0)) || |
2610 | (Val == llvm::bit_cast<uint64_t>(0.5)) || |
2611 | (Val == llvm::bit_cast<uint64_t>(-0.5)) || |
2612 | (Val == llvm::bit_cast<uint64_t>(2.0)) || |
2613 | (Val == llvm::bit_cast<uint64_t>(-2.0)) || |
2614 | (Val == llvm::bit_cast<uint64_t>(4.0)) || |
2615 | (Val == llvm::bit_cast<uint64_t>(-4.0)) || |
2616 | (Val == 0x3fc45f306dc9c882 && HasInv2Pi); |
2617 | } |
2618 | |
2619 | bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { |
2620 | if (isInlinableIntLiteral(Literal)) |
2621 | return true; |
2622 | |
2623 | // The actual type of the operand does not seem to matter as long |
2624 | // as the bits match one of the inline immediate values. For example: |
2625 | // |
2626 | // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, |
2627 | // so it is a legal inline immediate. |
2628 | // |
2629 | // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in |
2630 | // floating-point, so it is a legal inline immediate. |
2631 | |
2632 | uint32_t Val = static_cast<uint32_t>(Literal); |
2633 | return (Val == llvm::bit_cast<uint32_t>(0.0f)) || |
2634 | (Val == llvm::bit_cast<uint32_t>(1.0f)) || |
2635 | (Val == llvm::bit_cast<uint32_t>(-1.0f)) || |
2636 | (Val == llvm::bit_cast<uint32_t>(0.5f)) || |
2637 | (Val == llvm::bit_cast<uint32_t>(-0.5f)) || |
2638 | (Val == llvm::bit_cast<uint32_t>(2.0f)) || |
2639 | (Val == llvm::bit_cast<uint32_t>(-2.0f)) || |
2640 | (Val == llvm::bit_cast<uint32_t>(4.0f)) || |
2641 | (Val == llvm::bit_cast<uint32_t>(-4.0f)) || |
2642 | (Val == 0x3e22f983 && HasInv2Pi); |
2643 | } |
2644 | |
2645 | bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) { |
2646 | if (!HasInv2Pi) |
2647 | return false; |
2648 | if (isInlinableIntLiteral(Literal)) |
2649 | return true; |
2650 | uint16_t Val = static_cast<uint16_t>(Literal); |
2651 | return Val == 0x3F00 || // 0.5 |
2652 | Val == 0xBF00 || // -0.5 |
2653 | Val == 0x3F80 || // 1.0 |
2654 | Val == 0xBF80 || // -1.0 |
2655 | Val == 0x4000 || // 2.0 |
2656 | Val == 0xC000 || // -2.0 |
2657 | Val == 0x4080 || // 4.0 |
2658 | Val == 0xC080 || // -4.0 |
2659 | Val == 0x3E22; // 1.0 / (2.0 * pi) |
2660 | } |
2661 | |
2662 | bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi) { |
2663 | return isInlinableLiteral32(Literal, HasInv2Pi); |
2664 | } |
2665 | |
2666 | bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) { |
2667 | if (!HasInv2Pi) |
2668 | return false; |
2669 | if (isInlinableIntLiteral(Literal)) |
2670 | return true; |
2671 | uint16_t Val = static_cast<uint16_t>(Literal); |
2672 | return Val == 0x3C00 || // 1.0 |
2673 | Val == 0xBC00 || // -1.0 |
2674 | Val == 0x3800 || // 0.5 |
2675 | Val == 0xB800 || // -0.5 |
2676 | Val == 0x4000 || // 2.0 |
2677 | Val == 0xC000 || // -2.0 |
2678 | Val == 0x4400 || // 4.0 |
2679 | Val == 0xC400 || // -4.0 |
2680 | Val == 0x3118; // 1/2pi |
2681 | } |
2682 | |
2683 | std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) { |
2684 | // Unfortunately, the Instruction Set Architecture Reference Guide is |
2685 | // misleading about how the inline operands work for (packed) 16-bit |
2686 | // instructions. In a nutshell, the actual HW behavior is: |
2687 | // |
2688 | // - integer encodings (-16 .. 64) are always produced as sign-extended |
2689 | // 32-bit values |
2690 | // - float encodings are produced as: |
2691 | // - for F16 instructions: corresponding half-precision float values in |
2692 | // the LSBs, 0 in the MSBs |
2693 | // - for UI16 instructions: corresponding single-precision float value |
2694 | int32_t Signed = static_cast<int32_t>(Literal); |
2695 | if (Signed >= 0 && Signed <= 64) |
2696 | return 128 + Signed; |
2697 | |
2698 | if (Signed >= -16 && Signed <= -1) |
2699 | return 192 + std::abs(x: Signed); |
2700 | |
2701 | if (IsFloat) { |
2702 | // clang-format off |
2703 | switch (Literal) { |
2704 | case 0x3800: return 240; // 0.5 |
2705 | case 0xB800: return 241; // -0.5 |
2706 | case 0x3C00: return 242; // 1.0 |
2707 | case 0xBC00: return 243; // -1.0 |
2708 | case 0x4000: return 244; // 2.0 |
2709 | case 0xC000: return 245; // -2.0 |
2710 | case 0x4400: return 246; // 4.0 |
2711 | case 0xC400: return 247; // -4.0 |
2712 | case 0x3118: return 248; // 1.0 / (2.0 * pi) |
2713 | default: break; |
2714 | } |
2715 | // clang-format on |
2716 | } else { |
2717 | // clang-format off |
2718 | switch (Literal) { |
2719 | case 0x3F000000: return 240; // 0.5 |
2720 | case 0xBF000000: return 241; // -0.5 |
2721 | case 0x3F800000: return 242; // 1.0 |
2722 | case 0xBF800000: return 243; // -1.0 |
2723 | case 0x40000000: return 244; // 2.0 |
2724 | case 0xC0000000: return 245; // -2.0 |
2725 | case 0x40800000: return 246; // 4.0 |
2726 | case 0xC0800000: return 247; // -4.0 |
2727 | case 0x3E22F983: return 248; // 1.0 / (2.0 * pi) |
2728 | default: break; |
2729 | } |
2730 | // clang-format on |
2731 | } |
2732 | |
2733 | return {}; |
2734 | } |
2735 | |
2736 | // Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction |
2737 | // or nullopt. |
2738 | std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) { |
2739 | return getInlineEncodingV216(IsFloat: false, Literal); |
2740 | } |
2741 | |
2742 | // Encoding of the literal as an inline constant for a V_PK_*_BF16 instruction |
2743 | // or nullopt. |
2744 | std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal) { |
2745 | int32_t Signed = static_cast<int32_t>(Literal); |
2746 | if (Signed >= 0 && Signed <= 64) |
2747 | return 128 + Signed; |
2748 | |
2749 | if (Signed >= -16 && Signed <= -1) |
2750 | return 192 + std::abs(x: Signed); |
2751 | |
2752 | // clang-format off |
2753 | switch (Literal) { |
2754 | case 0x3F00: return 240; // 0.5 |
2755 | case 0xBF00: return 241; // -0.5 |
2756 | case 0x3F80: return 242; // 1.0 |
2757 | case 0xBF80: return 243; // -1.0 |
2758 | case 0x4000: return 244; // 2.0 |
2759 | case 0xC000: return 245; // -2.0 |
2760 | case 0x4080: return 246; // 4.0 |
2761 | case 0xC080: return 247; // -4.0 |
2762 | case 0x3E22: return 248; // 1.0 / (2.0 * pi) |
2763 | default: break; |
2764 | } |
2765 | // clang-format on |
2766 | |
2767 | return std::nullopt; |
2768 | } |
2769 | |
2770 | // Encoding of the literal as an inline constant for a V_PK_*_F16 instruction |
2771 | // or nullopt. |
2772 | std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { |
2773 | return getInlineEncodingV216(IsFloat: true, Literal); |
2774 | } |
2775 | |
2776 | // Whether the given literal can be inlined for a V_PK_* instruction. |
2777 | bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { |
2778 | switch (OpType) { |
2779 | case AMDGPU::OPERAND_REG_IMM_V2INT16: |
2780 | case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: |
2781 | case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: |
2782 | return getInlineEncodingV216(IsFloat: false, Literal).has_value(); |
2783 | case AMDGPU::OPERAND_REG_IMM_V2FP16: |
2784 | case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: |
2785 | case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: |
2786 | return getInlineEncodingV216(IsFloat: true, Literal).has_value(); |
2787 | case AMDGPU::OPERAND_REG_IMM_V2BF16: |
2788 | case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: |
2789 | case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16: |
2790 | return isInlinableLiteralV2BF16(Literal); |
2791 | default: |
2792 | llvm_unreachable("bad packed operand type" ); |
2793 | } |
2794 | } |
2795 | |
2796 | // Whether the given literal can be inlined for a V_PK_*_IU16 instruction. |
2797 | bool isInlinableLiteralV2I16(uint32_t Literal) { |
2798 | return getInlineEncodingV2I16(Literal).has_value(); |
2799 | } |
2800 | |
2801 | // Whether the given literal can be inlined for a V_PK_*_BF16 instruction. |
2802 | bool isInlinableLiteralV2BF16(uint32_t Literal) { |
2803 | return getInlineEncodingV2BF16(Literal).has_value(); |
2804 | } |
2805 | |
2806 | // Whether the given literal can be inlined for a V_PK_*_F16 instruction. |
2807 | bool isInlinableLiteralV2F16(uint32_t Literal) { |
2808 | return getInlineEncodingV2F16(Literal).has_value(); |
2809 | } |
2810 | |
2811 | bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { |
2812 | if (IsFP64) |
2813 | return !(Val & 0xffffffffu); |
2814 | |
2815 | return isUInt<32>(Val) || isInt<32>(Val); |
2816 | } |
2817 | |
2818 | bool isArgPassedInSGPR(const Argument *A) { |
2819 | const Function *F = A->getParent(); |
2820 | |
2821 | // Arguments to compute shaders are never a source of divergence. |
2822 | CallingConv::ID CC = F->getCallingConv(); |
2823 | switch (CC) { |
2824 | case CallingConv::AMDGPU_KERNEL: |
2825 | case CallingConv::SPIR_KERNEL: |
2826 | return true; |
2827 | case CallingConv::AMDGPU_VS: |
2828 | case CallingConv::AMDGPU_LS: |
2829 | case CallingConv::AMDGPU_HS: |
2830 | case CallingConv::AMDGPU_ES: |
2831 | case CallingConv::AMDGPU_GS: |
2832 | case CallingConv::AMDGPU_PS: |
2833 | case CallingConv::AMDGPU_CS: |
2834 | case CallingConv::AMDGPU_Gfx: |
2835 | case CallingConv::AMDGPU_CS_Chain: |
2836 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2837 | // For non-compute shaders, SGPR inputs are marked with either inreg or |
2838 | // byval. Everything else is in VGPRs. |
2839 | return A->hasAttribute(Attribute::InReg) || |
2840 | A->hasAttribute(Attribute::ByVal); |
2841 | default: |
2842 | // TODO: treat i1 as divergent? |
2843 | return A->hasAttribute(Attribute::InReg); |
2844 | } |
2845 | } |
2846 | |
2847 | bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) { |
2848 | // Arguments to compute shaders are never a source of divergence. |
2849 | CallingConv::ID CC = CB->getCallingConv(); |
2850 | switch (CC) { |
2851 | case CallingConv::AMDGPU_KERNEL: |
2852 | case CallingConv::SPIR_KERNEL: |
2853 | return true; |
2854 | case CallingConv::AMDGPU_VS: |
2855 | case CallingConv::AMDGPU_LS: |
2856 | case CallingConv::AMDGPU_HS: |
2857 | case CallingConv::AMDGPU_ES: |
2858 | case CallingConv::AMDGPU_GS: |
2859 | case CallingConv::AMDGPU_PS: |
2860 | case CallingConv::AMDGPU_CS: |
2861 | case CallingConv::AMDGPU_Gfx: |
2862 | case CallingConv::AMDGPU_CS_Chain: |
2863 | case CallingConv::AMDGPU_CS_ChainPreserve: |
2864 | // For non-compute shaders, SGPR inputs are marked with either inreg or |
2865 | // byval. Everything else is in VGPRs. |
2866 | return CB->paramHasAttr(ArgNo, Attribute::InReg) || |
2867 | CB->paramHasAttr(ArgNo, Attribute::ByVal); |
2868 | default: |
2869 | return CB->paramHasAttr(ArgNo, Attribute::InReg); |
2870 | } |
2871 | } |
2872 | |
2873 | static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { |
2874 | return isGCN3Encoding(STI: ST) || isGFX10Plus(STI: ST); |
2875 | } |
2876 | |
2877 | static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { |
2878 | return isGFX9Plus(STI: ST); |
2879 | } |
2880 | |
2881 | bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, |
2882 | int64_t EncodedOffset) { |
2883 | if (isGFX12Plus(STI: ST)) |
2884 | return isUInt<23>(EncodedOffset); |
2885 | |
2886 | return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) |
2887 | : isUInt<8>(EncodedOffset); |
2888 | } |
2889 | |
2890 | bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, |
2891 | int64_t EncodedOffset, |
2892 | bool IsBuffer) { |
2893 | if (isGFX12Plus(STI: ST)) |
2894 | return isInt<24>(EncodedOffset); |
2895 | |
2896 | return !IsBuffer && |
2897 | hasSMRDSignedImmOffset(ST) && |
2898 | isInt<21>(EncodedOffset); |
2899 | } |
2900 | |
2901 | static bool isDwordAligned(uint64_t ByteOffset) { |
2902 | return (ByteOffset & 3) == 0; |
2903 | } |
2904 | |
2905 | uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, |
2906 | uint64_t ByteOffset) { |
2907 | if (hasSMEMByteOffset(ST)) |
2908 | return ByteOffset; |
2909 | |
2910 | assert(isDwordAligned(ByteOffset)); |
2911 | return ByteOffset >> 2; |
2912 | } |
2913 | |
2914 | std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, |
2915 | int64_t ByteOffset, bool IsBuffer) { |
2916 | if (isGFX12Plus(STI: ST)) // 24 bit signed offsets |
2917 | return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset) |
2918 | : std::nullopt; |
2919 | |
2920 | // The signed version is always a byte offset. |
2921 | if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { |
2922 | assert(hasSMEMByteOffset(ST)); |
2923 | return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset) |
2924 | : std::nullopt; |
2925 | } |
2926 | |
2927 | if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) |
2928 | return std::nullopt; |
2929 | |
2930 | int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); |
2931 | return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) |
2932 | ? std::optional<int64_t>(EncodedOffset) |
2933 | : std::nullopt; |
2934 | } |
2935 | |
2936 | std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, |
2937 | int64_t ByteOffset) { |
2938 | if (!isCI(STI: ST) || !isDwordAligned(ByteOffset)) |
2939 | return std::nullopt; |
2940 | |
2941 | int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); |
2942 | return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset) |
2943 | : std::nullopt; |
2944 | } |
2945 | |
2946 | unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) { |
2947 | if (AMDGPU::isGFX10(STI: ST)) |
2948 | return 12; |
2949 | |
2950 | if (AMDGPU::isGFX12(STI: ST)) |
2951 | return 24; |
2952 | return 13; |
2953 | } |
2954 | |
2955 | namespace { |
2956 | |
2957 | struct SourceOfDivergence { |
2958 | unsigned Intr; |
2959 | }; |
2960 | const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); |
2961 | |
2962 | struct AlwaysUniform { |
2963 | unsigned Intr; |
2964 | }; |
2965 | const AlwaysUniform *lookupAlwaysUniform(unsigned Intr); |
2966 | |
2967 | #define GET_SourcesOfDivergence_IMPL |
2968 | #define GET_UniformIntrinsics_IMPL |
2969 | #define GET_Gfx9BufferFormat_IMPL |
2970 | #define GET_Gfx10BufferFormat_IMPL |
2971 | #define GET_Gfx11PlusBufferFormat_IMPL |
2972 | #include "AMDGPUGenSearchableTables.inc" |
2973 | |
2974 | } // end anonymous namespace |
2975 | |
2976 | bool isIntrinsicSourceOfDivergence(unsigned IntrID) { |
2977 | return lookupSourceOfDivergence(Intr: IntrID); |
2978 | } |
2979 | |
2980 | bool isIntrinsicAlwaysUniform(unsigned IntrID) { |
2981 | return lookupAlwaysUniform(Intr: IntrID); |
2982 | } |
2983 | |
2984 | const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, |
2985 | uint8_t NumComponents, |
2986 | uint8_t NumFormat, |
2987 | const MCSubtargetInfo &STI) { |
2988 | return isGFX11Plus(STI) |
2989 | ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents, |
2990 | NumFormat) |
2991 | : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp, |
2992 | NumComponents, NumFormat) |
2993 | : getGfx9BufferFormatInfo(BitsPerComp, |
2994 | NumComponents, NumFormat); |
2995 | } |
2996 | |
2997 | const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, |
2998 | const MCSubtargetInfo &STI) { |
2999 | return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format) |
3000 | : isGFX10(STI) ? getGfx10BufferFormatInfo(Format) |
3001 | : getGfx9BufferFormatInfo(Format); |
3002 | } |
3003 | |
3004 | bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) { |
3005 | for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1, |
3006 | OpName::src2 }) { |
3007 | int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName); |
3008 | if (Idx == -1) |
3009 | continue; |
3010 | |
3011 | if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID || |
3012 | OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID) |
3013 | return true; |
3014 | } |
3015 | |
3016 | return false; |
3017 | } |
3018 | |
3019 | bool isDPALU_DPP(const MCInstrDesc &OpDesc) { |
3020 | return hasAny64BitVGPROperands(OpDesc); |
3021 | } |
3022 | |
3023 | unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) { |
3024 | // Currently this is 128 for all subtargets |
3025 | return 128; |
3026 | } |
3027 | |
3028 | } // namespace AMDGPU |
3029 | |
3030 | raw_ostream &operator<<(raw_ostream &OS, |
3031 | const AMDGPU::IsaInfo::TargetIDSetting S) { |
3032 | switch (S) { |
3033 | case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): |
3034 | OS << "Unsupported" ; |
3035 | break; |
3036 | case (AMDGPU::IsaInfo::TargetIDSetting::Any): |
3037 | OS << "Any" ; |
3038 | break; |
3039 | case (AMDGPU::IsaInfo::TargetIDSetting::Off): |
3040 | OS << "Off" ; |
3041 | break; |
3042 | case (AMDGPU::IsaInfo::TargetIDSetting::On): |
3043 | OS << "On" ; |
3044 | break; |
3045 | } |
3046 | return OS; |
3047 | } |
3048 | |
3049 | } // namespace llvm |
3050 | |