AMDGPUBaseInfo.h source code [llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h]

1	//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10	#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12	#include "SIDefines.h"
13	#include "llvm/IR/CallingConv.h"
14	#include "llvm/IR/InstrTypes.h"
15	#include "llvm/IR/Module.h"
16	#include "llvm/Support/Alignment.h"
17	#include <array>
18	#include <functional>
19	#include <utility>
20
21	struct amd_kernel_code_t;
22
23	namespace llvm {
24
25	struct Align;
26	class Argument;
27	class Function;
28	class GlobalValue;
29	class MCInstrInfo;
30	class MCRegisterClass;
31	class MCRegisterInfo;
32	class MCSubtargetInfo;
33	class StringRef;
34	class Triple;
35	class raw_ostream;
36
37	namespace amdhsa {
38	struct kernel_descriptor_t;
39	}
40
41	namespace AMDGPU {
42
43	struct IsaVersion;
44
45	/// Generic target versions emitted by this version of LLVM.
46	///
47	/// These numbers are incremented every time a codegen breaking change occurs
48	/// within a generic family.
49	namespace GenericVersion {
50	static constexpr unsigned GFX9 = `1`;
51	static constexpr unsigned GFX10_1 = `1`;
52	static constexpr unsigned GFX10_3 = `1`;
53	static constexpr unsigned GFX11 = `1`;
54	} // namespace GenericVersion
55
56	enum { AMDHSA_COV4 = `4`, AMDHSA_COV5 = `5`, AMDHSA_COV6 = `6` };
57
58	/// \returns True if \p STI is AMDHSA.
59	bool isHsaAbi(const MCSubtargetInfo &STI);
60
61	/// \returns Code object version from the IR module flag.
62	unsigned getAMDHSACodeObjectVersion(const Module &M);
63
64	/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
65	unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
66
67	/// \returns The default HSA code object version. This should only be used when
68	/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
69	/// flag or a .amdhsa_code_object_version directive)
70	unsigned getDefaultAMDHSACodeObjectVersion();
71
72	/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
73	/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
74	uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
75
76	/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
77	unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
78
79	/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
80	unsigned getHostcallImplicitArgPosition(unsigned COV);
81
82	unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
83	unsigned getCompletionActionImplicitArgPosition(unsigned COV);
84
85	struct GcnBufferFormatInfo {
86	unsigned Format;
87	unsigned BitsPerComp;
88	unsigned NumComponents;
89	unsigned NumFormat;
90	unsigned DataFormat;
91	};
92
93	struct MAIInstInfo {
94	uint16_t Opcode;
95	bool is_dgemm;
96	bool is_gfx940_xdl;
97	};
98
99	#define GET_MIMGBaseOpcode_DECL
100	#define GET_MIMGDim_DECL
101	#define GET_MIMGEncoding_DECL
102	#define GET_MIMGLZMapping_DECL
103	#define GET_MIMGMIPMapping_DECL
104	#define GET_MIMGBiASMapping_DECL
105	#define GET_MAIInstInfoTable_DECL
106	#include "AMDGPUGenSearchableTables.inc"
107
108	namespace IsaInfo {
109
110	enum {
111	// The closed Vulkan driver sets 96, which limits the wave count to 8 but
112	// doesn't spill SGPRs as much as when 80 is set.
113	FIXED_NUM_SGPRS_FOR_INIT_BUG = `96`,
114	TRAP_NUM_SGPRS = `16`
115	};
116
117	enum class TargetIDSetting {
118	Unsupported,
119	Any,
120	Off,
121	On
122	};
123
124	class AMDGPUTargetID {
125	private:
126	const MCSubtargetInfo &STI;
127	TargetIDSetting XnackSetting;
128	TargetIDSetting SramEccSetting;
129
130	public:
131	explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
132	~AMDGPUTargetID() = default;
133
134	/// \return True if the current xnack setting is not "Unsupported".
135	bool isXnackSupported() const {
136	return XnackSetting != TargetIDSetting::Unsupported;
137	}
138
139	/// \returns True if the current xnack setting is "On" or "Any".
140	bool isXnackOnOrAny() const {
141	return XnackSetting == TargetIDSetting::On \|\|
142	XnackSetting == TargetIDSetting::Any;
143	}
144
145	/// \returns True if current xnack setting is "On" or "Off",
146	/// false otherwise.
147	bool isXnackOnOrOff() const {
148	return getXnackSetting() == TargetIDSetting::On \|\|
149	getXnackSetting() == TargetIDSetting::Off;
150	}
151
152	/// \returns The current xnack TargetIDSetting, possible options are
153	/// "Unsupported", "Any", "Off", and "On".
154	TargetIDSetting getXnackSetting() const {
155	return XnackSetting;
156	}
157
158	/// Sets xnack setting to \p NewXnackSetting.
159	void setXnackSetting(TargetIDSetting NewXnackSetting) {
160	XnackSetting = NewXnackSetting;
161	}
162
163	/// \return True if the current sramecc setting is not "Unsupported".
164	bool isSramEccSupported() const {
165	return SramEccSetting != TargetIDSetting::Unsupported;
166	}
167
168	/// \returns True if the current sramecc setting is "On" or "Any".
169	bool isSramEccOnOrAny() const {
170	return SramEccSetting == TargetIDSetting::On \|\|
171	SramEccSetting == TargetIDSetting::Any;
172	}
173
174	/// \returns True if current sramecc setting is "On" or "Off",
175	/// false otherwise.
176	bool isSramEccOnOrOff() const {
177	return getSramEccSetting() == TargetIDSetting::On \|\|
178	getSramEccSetting() == TargetIDSetting::Off;
179	}
180
181	/// \returns The current sramecc TargetIDSetting, possible options are
182	/// "Unsupported", "Any", "Off", and "On".
183	TargetIDSetting getSramEccSetting() const {
184	return SramEccSetting;
185	}
186
187	/// Sets sramecc setting to \p NewSramEccSetting.
188	void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
189	SramEccSetting = NewSramEccSetting;
190	}
191
192	void setTargetIDFromFeaturesString(StringRef FS);
193	void setTargetIDFromTargetIDStream(StringRef TargetID);
194
195	/// \returns String representation of an object.
196	std::string toString() const;
197	};
198
199	/// \returns Wavefront size for given subtarget \p STI.
200	unsigned getWavefrontSize(const MCSubtargetInfo *STI);
201
202	/// \returns Local memory size in bytes for given subtarget \p STI.
203	unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
204
205	/// \returns Maximum addressable local memory size in bytes for given subtarget
206	/// \p STI.
207	unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
208
209	/// \returns Number of execution units per compute unit for given subtarget \p
210	/// STI.
211	unsigned getEUsPerCU(const MCSubtargetInfo *STI);
212
213	/// \returns Maximum number of work groups per compute unit for given subtarget
214	/// \p STI and limited by given \p FlatWorkGroupSize.
215	unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
216	unsigned FlatWorkGroupSize);
217
218	/// \returns Minimum number of waves per execution unit for given subtarget \p
219	/// STI.
220	unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
221
222	/// \returns Maximum number of waves per execution unit for given subtarget \p
223	/// STI without any kind of limitation.
224	unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
225
226	/// \returns Number of waves per execution unit required to support the given \p
227	/// FlatWorkGroupSize.
228	unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
229	unsigned FlatWorkGroupSize);
230
231	/// \returns Minimum flat work group size for given subtarget \p STI.
232	unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
233
234	/// \returns Maximum flat work group size for given subtarget \p STI.
235	unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
236
237	/// \returns Number of waves per work group for given subtarget \p STI and
238	/// \p FlatWorkGroupSize.
239	unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
240	unsigned FlatWorkGroupSize);
241
242	/// \returns SGPR allocation granularity for given subtarget \p STI.
243	unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
244
245	/// \returns SGPR encoding granularity for given subtarget \p STI.
246	unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
247
248	/// \returns Total number of SGPRs for given subtarget \p STI.
249	unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
250
251	/// \returns Addressable number of SGPRs for given subtarget \p STI.
252	unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
253
254	/// \returns Minimum number of SGPRs that meets the given number of waves per
255	/// execution unit requirement for given subtarget \p STI.
256	unsigned getMinNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
257
258	/// \returns Maximum number of SGPRs that meets the given number of waves per
259	/// execution unit requirement for given subtarget \p STI.
260	unsigned getMaxNumSGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU,
261	bool Addressable);
262
263	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
264	/// STI when the given special registers are used.
265	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
266	bool FlatScrUsed, bool XNACKUsed);
267
268	/// \returns Number of extra SGPRs implicitly required by given subtarget \p
269	/// STI when the given special registers are used. XNACK is inferred from
270	/// \p STI.
271	unsigned getNumExtraSGPRs(const MCSubtargetInfo STI, bool* VCCUsed,
272	bool FlatScrUsed);
273
274	/// \returns Number of SGPR blocks needed for given subtarget \p STI when
275	/// \p NumSGPRs are used. \p NumSGPRs should already include any special
276	/// register counts.
277	unsigned getNumSGPRBlocks(const MCSubtargetInfo STI, unsigned* NumSGPRs);
278
279	/// \returns VGPR allocation granularity for given subtarget \p STI.
280	///
281	/// For subtargets which support it, \p EnableWavefrontSize32 should match
282	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
283	unsigned
284	getVGPRAllocGranule(const MCSubtargetInfo *STI,
285	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
286
287	/// \returns VGPR encoding granularity for given subtarget \p STI.
288	///
289	/// For subtargets which support it, \p EnableWavefrontSize32 should match
290	/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
291	unsigned getVGPREncodingGranule(
292	const MCSubtargetInfo *STI,
293	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
294
295	/// \returns Total number of VGPRs for given subtarget \p STI.
296	unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
297
298	/// \returns Addressable number of VGPRs for given subtarget \p STI.
299	unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
300
301	/// \returns Minimum number of VGPRs that meets given number of waves per
302	/// execution unit requirement for given subtarget \p STI.
303	unsigned getMinNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
304
305	/// \returns Maximum number of VGPRs that meets given number of waves per
306	/// execution unit requirement for given subtarget \p STI.
307	unsigned getMaxNumVGPRs(const MCSubtargetInfo STI, unsigned* WavesPerEU);
308
309	/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
310	/// subtarget \p STI.
311	unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
312	unsigned NumVGPRs);
313
314	/// \returns Number of VGPR blocks needed for given subtarget \p STI when
315	/// \p NumVGPRs are used.
316	///
317	/// For subtargets which support it, \p EnableWavefrontSize32 should match the
318	/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
319	unsigned
320	getNumVGPRBlocks(const MCSubtargetInfo STI, unsigned* NumSGPRs,
321	std::optional<bool> EnableWavefrontSize32 = std::nullopt);
322
323	} // end namespace IsaInfo
324
325	LLVM_READONLY
326	int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
327
328	LLVM_READONLY
329	inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) {
330	return getNamedOperandIdx(Opcode, NamedIdx) != -`1`;
331	}
332
333	LLVM_READONLY
334	int getSOPPWithRelaxation(uint16_t Opcode);
335
336	struct MIMGBaseOpcodeInfo {
337	MIMGBaseOpcode BaseOpcode;
338	bool Store;
339	bool Atomic;
340	bool AtomicX2;
341	bool Sampler;
342	bool Gather4;
343
344	uint8_t NumExtraArgs;
345	bool Gradients;
346	bool G16;
347	bool Coordinates;
348	bool LodOrClampOrMip;
349	bool HasD16;
350	bool MSAA;
351	bool BVH;
352	bool A16;
353	};
354
355	LLVM_READONLY
356	const MIMGBaseOpcodeInfo getMIMGBaseOpcode(unsigned* Opc);
357
358	LLVM_READONLY
359	const MIMGBaseOpcodeInfo getMIMGBaseOpcodeInfo(unsigned* BaseOpcode);
360
361	struct MIMGDimInfo {
362	MIMGDim Dim;
363	uint8_t NumCoords;
364	uint8_t NumGradients;
365	bool MSAA;
366	bool DA;
367	uint8_t Encoding;
368	const char *AsmSuffix;
369	};
370
371	LLVM_READONLY
372	const MIMGDimInfo getMIMGDimInfo(unsigned* DimEnum);
373
374	LLVM_READONLY
375	const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
376
377	LLVM_READONLY
378	const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
379
380	struct MIMGLZMappingInfo {
381	MIMGBaseOpcode L;
382	MIMGBaseOpcode LZ;
383	};
384
385	struct MIMGMIPMappingInfo {
386	MIMGBaseOpcode MIP;
387	MIMGBaseOpcode NONMIP;
388	};
389
390	struct MIMGBiasMappingInfo {
391	MIMGBaseOpcode Bias;
392	MIMGBaseOpcode NoBias;
393	};
394
395	struct MIMGOffsetMappingInfo {
396	MIMGBaseOpcode Offset;
397	MIMGBaseOpcode NoOffset;
398	};
399
400	struct MIMGG16MappingInfo {
401	MIMGBaseOpcode G;
402	MIMGBaseOpcode G16;
403	};
404
405	LLVM_READONLY
406	const MIMGLZMappingInfo getMIMGLZMappingInfo(unsigned* L);
407
408	struct WMMAOpcodeMappingInfo {
409	unsigned Opcode2Addr;
410	unsigned Opcode3Addr;
411	};
412
413	LLVM_READONLY
414	const MIMGMIPMappingInfo getMIMGMIPMappingInfo(unsigned* MIP);
415
416	LLVM_READONLY
417	const MIMGBiasMappingInfo getMIMGBiasMappingInfo(unsigned* Bias);
418
419	LLVM_READONLY
420	const MIMGOffsetMappingInfo getMIMGOffsetMappingInfo(unsigned* Offset);
421
422	LLVM_READONLY
423	const MIMGG16MappingInfo getMIMGG16MappingInfo(unsigned* G);
424
425	LLVM_READONLY
426	int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
427	unsigned VDataDwords, unsigned VAddrDwords);
428
429	LLVM_READONLY
430	int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
431
432	LLVM_READONLY
433	unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
434	const MIMGDimInfo Dim, bool* IsA16,
435	bool IsG16Supported);
436
437	struct MIMGInfo {
438	uint16_t Opcode;
439	uint16_t BaseOpcode;
440	uint8_t MIMGEncoding;
441	uint8_t VDataDwords;
442	uint8_t VAddrDwords;
443	uint8_t VAddrOperands;
444	};
445
446	LLVM_READONLY
447	const MIMGInfo getMIMGInfo(unsigned* Opc);
448
449	LLVM_READONLY
450	int getMTBUFBaseOpcode(unsigned Opc);
451
452	LLVM_READONLY
453	int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
454
455	LLVM_READONLY
456	int getMTBUFElements(unsigned Opc);
457
458	LLVM_READONLY
459	bool getMTBUFHasVAddr(unsigned Opc);
460
461	LLVM_READONLY
462	bool getMTBUFHasSrsrc(unsigned Opc);
463
464	LLVM_READONLY
465	bool getMTBUFHasSoffset(unsigned Opc);
466
467	LLVM_READONLY
468	int getMUBUFBaseOpcode(unsigned Opc);
469
470	LLVM_READONLY
471	int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
472
473	LLVM_READONLY
474	int getMUBUFElements(unsigned Opc);
475
476	LLVM_READONLY
477	bool getMUBUFHasVAddr(unsigned Opc);
478
479	LLVM_READONLY
480	bool getMUBUFHasSrsrc(unsigned Opc);
481
482	LLVM_READONLY
483	bool getMUBUFHasSoffset(unsigned Opc);
484
485	LLVM_READONLY
486	bool getMUBUFIsBufferInv(unsigned Opc);
487
488	LLVM_READONLY
489	bool getSMEMIsBuffer(unsigned Opc);
490
491	LLVM_READONLY
492	bool getVOP1IsSingle(unsigned Opc);
493
494	LLVM_READONLY
495	bool getVOP2IsSingle(unsigned Opc);
496
497	LLVM_READONLY
498	bool getVOP3IsSingle(unsigned Opc);
499
500	LLVM_READONLY
501	bool isVOPC64DPP(unsigned Opc);
502
503	LLVM_READONLY
504	bool isVOPCAsmOnly(unsigned Opc);
505
506	/// Returns true if MAI operation is a double precision GEMM.
507	LLVM_READONLY
508	bool getMAIIsDGEMM(unsigned Opc);
509
510	LLVM_READONLY
511	bool getMAIIsGFX940XDL(unsigned Opc);
512
513	struct CanBeVOPD {
514	bool X;
515	bool Y;
516	};
517
518	/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
519	LLVM_READONLY
520	unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
521
522	LLVM_READONLY
523	CanBeVOPD getCanBeVOPD(unsigned Opc);
524
525	LLVM_READONLY
526	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
527	uint8_t NumComponents,
528	uint8_t NumFormat,
529	const MCSubtargetInfo &STI);
530	LLVM_READONLY
531	const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
532	const MCSubtargetInfo &STI);
533
534	LLVM_READONLY
535	int getMCOpcode(uint16_t Opcode, unsigned Gen);
536
537	LLVM_READONLY
538	unsigned getVOPDOpcode(unsigned Opc);
539
540	LLVM_READONLY
541	int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily);
542
543	LLVM_READONLY
544	bool isVOPD(unsigned Opc);
545
546	LLVM_READNONE
547	bool isMAC(unsigned Opc);
548
549	LLVM_READNONE
550	bool isPermlane16(unsigned Opc);
551
552	LLVM_READNONE
553	bool isGenericAtomic(unsigned Opc);
554
555	LLVM_READNONE
556	bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
557
558	namespace VOPD {
559
560	enum Component : unsigned {
561	DST = `0`,
562	SRC0,
563	SRC1,
564	SRC2,
565
566	DST_NUM = `1`,
567	MAX_SRC_NUM = `3`,
568	MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
569	};
570
571	// LSB mask for VGPR banks per VOPD component operand.
572	// 4 banks result in a mask 3, setting 2 lower bits.
573	constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {`1`, `3`, `3`, `1`};
574
575	enum ComponentIndex : unsigned { X = `0`, Y = `1` };
576	constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
577	constexpr unsigned COMPONENTS_NUM = `2`;
578
579	// Properties of VOPD components.
580	class ComponentProps {
581	private:
582	unsigned SrcOperandsNum = `0`;
583	unsigned MandatoryLiteralIdx = ~`0u`;
584	bool HasSrc2Acc = false;
585
586	public:
587	ComponentProps() = default;
588	ComponentProps(const MCInstrDesc &OpDesc);
589
590	// Return the total number of src operands this component has.
591	unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
592
593	// Return the number of src operands of this component visible to the parser.
594	unsigned getCompParsedSrcOperandsNum() const {
595	return SrcOperandsNum - HasSrc2Acc;
596	}
597
598	// Return true iif this component has a mandatory literal.
599	bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~`0u`; }
600
601	// If this component has a mandatory literal, return component operand
602	// index of this literal (i.e. either Component::SRC1 or Component::SRC2).
603	unsigned getMandatoryLiteralCompOperandIndex() const {
604	assert(hasMandatoryLiteral());
605	return MandatoryLiteralIdx;
606	}
607
608	// Return true iif this component has operand
609	// with component index CompSrcIdx and this operand may be a register.
610	bool hasRegSrcOperand(unsigned CompSrcIdx) const {
611	assert(CompSrcIdx < Component::MAX_SRC_NUM);
612	return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
613	}
614
615	// Return true iif this component has tied src2.
616	bool hasSrc2Acc() const { return HasSrc2Acc; }
617
618	private:
619	bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
620	assert(CompSrcIdx < Component::MAX_SRC_NUM);
621	return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
622	}
623	};
624
625	enum ComponentKind : unsigned {
626	SINGLE = `0`, // A single VOP1 or VOP2 instruction which may be used in VOPD.
627	COMPONENT_X, // A VOPD instruction, X component.
628	COMPONENT_Y, // A VOPD instruction, Y component.
629	MAX = COMPONENT_Y
630	};
631
632	// Interface functions of this class map VOPD component operand indices
633	// to indices of operands in MachineInstr/MCInst or parsed operands array.
634	//
635	// Note that this class operates with 3 kinds of indices:
636	// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
637	// - MC operand indices (they refer operands in a MachineInstr/MCInst);
638	// - parsed operand indices (they refer operands in parsed operands array).
639	//
640	// For SINGLE components mapping between these indices is trivial.
641	// But things get more complicated for COMPONENT_X and
642	// COMPONENT_Y because these components share the same
643	// MachineInstr/MCInst and the same parsed operands array.
644	// Below is an example of component operand to parsed operand
645	// mapping for the following instruction:
646	//
647	// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
648	//
649	// PARSED COMPONENT PARSED
650	// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
651	// -------------------------------------------------------------------
652	// "v_dual_add_f32" 0
653	// v_dual_add_f32 v255 0 (DST) --> 1
654	// v4 1 (SRC0) --> 2
655	// v5 2 (SRC1) --> 3
656	// "::" 4
657	// "v_dual_mov_b32" 5
658	// v_dual_mov_b32 v6 0 (DST) --> 6
659	// v1 1 (SRC0) --> 7
660	// -------------------------------------------------------------------
661	//
662	class ComponentLayout {
663	private:
664	// Regular MachineInstr/MCInst operands are ordered as follows:
665	// dst, src0 [, other src operands]
666	// VOPD MachineInstr/MCInst operands are ordered as follows:
667	// dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
668	// Each ComponentKind has operand indices defined below.
669	static constexpr unsigned MC_DST_IDX[] = {`0`, `0`, `1`};
670	static constexpr unsigned FIRST_MC_SRC_IDX[] = {`1`, `2`, `2` / + OpX.MCSrcNum /};
671
672	// Parsed operands of regular instructions are ordered as follows:
673	// Mnemo dst src0 [vsrc1 ...]
674	// Parsed VOPD operands are ordered as follows:
675	// OpXMnemo dstX src0X [vsrc1X\|imm vsrc1X\|vsrc1X imm] '::'
676	// OpYMnemo dstY src0Y [vsrc1Y\|imm vsrc1Y\|vsrc1Y imm]
677	// Each ComponentKind has operand indices defined below.
678	static constexpr unsigned PARSED_DST_IDX[] = {`1`, `1`,
679	`4` / + OpX.ParsedSrcNum /};
680	static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
681	`2`, `2`, `5` / + OpX.ParsedSrcNum /};
682
683	private:
684	const ComponentKind Kind;
685	const ComponentProps PrevComp;
686
687	public:
688	// Create layout for COMPONENT_X or SINGLE component.
689	ComponentLayout(ComponentKind Kind) : Kind(Kind) {
690	assert(Kind == ComponentKind::SINGLE \|\| Kind == ComponentKind::COMPONENT_X);
691	}
692
693	// Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
694	ComponentLayout(const ComponentProps &OpXProps)
695	: Kind(ComponentKind::COMPONENT_Y), PrevComp (OpXProps) {}
696
697	public:
698	// Return the index of dst operand in MCInst operands.
699	unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
700
701	// Return the index of the specified src operand in MCInst operands.
702	unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
703	assert(CompSrcIdx < Component::MAX_SRC_NUM);
704	return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
705	}
706
707	// Return the index of dst operand in the parsed operands array.
708	unsigned getIndexOfDstInParsedOperands() const {
709	return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
710	}
711
712	// Return the index of the specified src operand in the parsed operands array.
713	unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
714	assert(CompSrcIdx < Component::MAX_SRC_NUM);
715	return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
716	}
717
718	private:
719	unsigned getPrevCompSrcNum() const {
720	return PrevComp.getCompSrcOperandsNum();
721	}
722	unsigned getPrevCompParsedSrcNum() const {
723	return PrevComp.getCompParsedSrcOperandsNum();
724	}
725	};
726
727	// Layout and properties of VOPD components.
728	class ComponentInfo : public ComponentLayout, public ComponentProps {
729	public:
730	// Create ComponentInfo for COMPONENT_X or SINGLE component.
731	ComponentInfo(const MCInstrDesc &OpDesc,
732	ComponentKind Kind = ComponentKind::SINGLE)
733	: ComponentLayout (Kind), ComponentProps (OpDesc) {}
734
735	// Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
736	ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
737	: ComponentLayout (OpXProps), ComponentProps (OpDesc) {}
738
739	// Map component operand index to parsed operand index.
740	// Return 0 if the specified operand does not exist.
741	unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
742	};
743
744	// Properties of VOPD instructions.
745	class InstInfo {
746	private:
747	const ComponentInfo CompInfo[COMPONENTS_NUM];
748
749	public:
750	using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
751
752	InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
753	: CompInfo{OpX, OpY} {}
754
755	InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
756	: CompInfo{OprInfoX, OprInfoY} {}
757
758	const ComponentInfo &operator[](size_t ComponentIdx) const {
759	assert(ComponentIdx < COMPONENTS_NUM);
760	return CompInfo[ComponentIdx];
761	}
762
763	// Check VOPD operands constraints.
764	// GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
765	// for the specified component and MC operand. The callback must return 0
766	// if the operand is not a register or not a VGPR.
767	// If \p SkipSrc is set to true then constraints for source operands are not
768	// checked.
769	bool hasInvalidOperand(std::function<unsigned(unsigned, unsigned)> GetRegIdx,
770	bool SkipSrc = false) const {
771	return getInvalidCompOperandIndex(GetRegIdx: GetRegIdx, SkipSrc).has_value();
772	}
773
774	// Check VOPD operands constraints.
775	// Return the index of an invalid component operand, if any.
776	// If \p SkipSrc is set to true then constraints for source operands are not
777	// checked.
778	std::optional<unsigned> getInvalidCompOperandIndex(
779	std::function<unsigned(unsigned, unsigned)> GetRegIdx,
780	bool SkipSrc = false) const;
781
782	private:
783	RegIndices
784	getRegIndices(unsigned ComponentIdx,
785	std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
786	};
787
788	} // namespace VOPD
789
790	LLVM_READONLY
791	std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
792
793	LLVM_READONLY
794	// Get properties of 2 single VOP1/VOP2 instructions
795	// used as components to create a VOPD instruction.
796	VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
797
798	LLVM_READONLY
799	// Get properties of VOPD X and Y components.
800	VOPD::InstInfo
801	getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
802
803	LLVM_READONLY
804	bool isTrue16Inst(unsigned Opc);
805
806	LLVM_READONLY
807	unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
808
809	LLVM_READONLY
810	unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
811
812	void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
813	const MCSubtargetInfo *STI);
814
815	amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
816	const MCSubtargetInfo *STI);
817
818	bool isGroupSegment(const GlobalValue *GV);
819	bool isGlobalSegment(const GlobalValue *GV);
820	bool isReadOnlySegment(const GlobalValue *GV);
821
822	/// \returns True if constants should be emitted to .text section for given
823	/// target triple \p TT, false otherwise.
824	bool shouldEmitConstantsToTextSection(const Triple &TT);
825
826	/// \returns Integer value requested using \p F's \p Name attribute.
827	///
828	/// \returns \p Default if attribute is not present.
829	///
830	/// \returns \p Default and emits error if requested value cannot be converted
831	/// to integer.
832	int getIntegerAttribute(const Function &F, StringRef Name, int Default);
833
834	/// \returns A pair of integer values requested using \p F's \p Name attribute
835	/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
836	/// is false).
837	///
838	/// \returns \p Default if attribute is not present.
839	///
840	/// \returns \p Default and emits error if one of the requested values cannot be
841	/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
842	/// not present.
843	std::pair<unsigned, unsigned>
844	getIntegerPairAttribute(const Function &F, StringRef Name,
845	std::pair<unsigned, unsigned> Default,
846	bool OnlyFirstRequired = false);
847
848	/// Represents the counter values to wait for in an s_waitcnt instruction.
849	///
850	/// Large values (including the maximum possible integer) can be used to
851	/// represent "don't care" waits.
852	struct Waitcnt {
853	unsigned LoadCnt = ~`0u`; // Corresponds to Vmcnt prior to gfx12.
854	unsigned ExpCnt = ~`0u`;
855	unsigned DsCnt = ~`0u`; // Corresponds to LGKMcnt prior to gfx12.
856	unsigned StoreCnt = ~`0u`; // Corresponds to VScnt on gfx10/gfx11.
857	unsigned SampleCnt = ~`0u`; // gfx12+ only.
858	unsigned BvhCnt = ~`0u`; // gfx12+ only.
859	unsigned KmCnt = ~`0u`; // gfx12+ only.
860
861	Waitcnt() = default;
862	// Pre-gfx12 constructor.
863	Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
864	: LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt),
865	SampleCnt(~`0u`), BvhCnt(~`0u`), KmCnt(~`0u`) {}
866
867	// gfx12+ constructor.
868	Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
869	unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt)
870	: LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
871	SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt) {}
872
873	static Waitcnt allZero(bool Extended, bool HasStorecnt) {
874	return Extended ? Waitcnt (`0`, `0`, `0`, `0`, `0`, `0`, `0`)
875	: Waitcnt (`0`, `0`, `0`, HasStorecnt ? `0` : ~`0u`);
876	}
877
878	static Waitcnt allZeroExceptVsCnt(bool Extended) {
879	return Extended ? Waitcnt (`0`, `0`, `0`, ~`0u`, `0`, `0`, `0`) : Waitcnt (`0`, `0`, `0`, ~`0u`);
880	}
881
882	bool hasWait() const { return StoreCnt != ~`0u` \|\| hasWaitExceptStoreCnt(); }
883
884	bool hasWaitExceptStoreCnt() const {
885	return LoadCnt != ~`0u` \|\| ExpCnt != ~`0u` \|\| DsCnt != ~`0u` \|\|
886	SampleCnt != ~`0u` \|\| BvhCnt != ~`0u` \|\| KmCnt != ~`0u`;
887	}
888
889	bool hasWaitStoreCnt() const { return StoreCnt != ~`0u`; }
890
891	Waitcnt combined(const Waitcnt &Other) const {
892	// Does the right thing provided self and Other are either both pre-gfx12
893	// or both gfx12+.
894	return Waitcnt(
895	std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
896	std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
897	std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
898	std::min(KmCnt, Other.KmCnt));
899	}
900	};
901
902	// The following methods are only meaningful on targets that support
903	// S_WAITCNT.
904
905	/// \returns Vmcnt bit mask for given isa \p Version.
906	unsigned getVmcntBitMask(const IsaVersion &Version);
907
908	/// \returns Expcnt bit mask for given isa \p Version.
909	unsigned getExpcntBitMask(const IsaVersion &Version);
910
911	/// \returns Lgkmcnt bit mask for given isa \p Version.
912	unsigned getLgkmcntBitMask(const IsaVersion &Version);
913
914	/// \returns Waitcnt bit mask for given isa \p Version.
915	unsigned getWaitcntBitMask(const IsaVersion &Version);
916
917	/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
918	unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
919
920	/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
921	unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
922
923	/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
924	unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
925
926	/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
927	/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
928	/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
929	/// which needs it is deprecated
930	///
931	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
932	/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
933	/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
934	/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
935	/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
936	/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
937	/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
938	/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
939	/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
940	///
941	void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
942	unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
943
944	Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
945
946	/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
947	unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
948	unsigned Vmcnt);
949
950	/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
951	unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
952	unsigned Expcnt);
953
954	/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
955	unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
956	unsigned Lgkmcnt);
957
958	/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
959	/// \p Version. Should not be used on gfx12+, the instruction which needs
960	/// it is deprecated
961	///
962	/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
963	/// Waitcnt[2:0] = \p Expcnt (gfx11+)
964	/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
965	/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
966	/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
967	/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
968	/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
969	/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
970	/// Waitcnt[15:10] = \p Vmcnt (gfx11)
971	/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
972	///
973	/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
974	/// isa \p Version.
975	///
976	unsigned encodeWaitcnt(const IsaVersion &Version,
977	unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
978
979	unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
980
981	// The following methods are only meaningful on targets that support
982	// S_WAIT_CNT, introduced with gfx12.*
983
984	/// \returns Loadcnt bit mask for given isa \p Version.
985	/// Returns 0 for versions that do not support LOADcnt
986	unsigned getLoadcntBitMask(const IsaVersion &Version);
987
988	/// \returns Samplecnt bit mask for given isa \p Version.
989	/// Returns 0 for versions that do not support SAMPLEcnt
990	unsigned getSamplecntBitMask(const IsaVersion &Version);
991
992	/// \returns Bvhcnt bit mask for given isa \p Version.
993	/// Returns 0 for versions that do not support BVHcnt
994	unsigned getBvhcntBitMask(const IsaVersion &Version);
995
996	/// \returns Dscnt bit mask for given isa \p Version.
997	/// Returns 0 for versions that do not support DScnt
998	unsigned getDscntBitMask(const IsaVersion &Version);
999
1000	/// \returns Dscnt bit mask for given isa \p Version.
1001	/// Returns 0 for versions that do not support KMcnt
1002	unsigned getKmcntBitMask(const IsaVersion &Version);
1003
1004	/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1005	/// returns 0 for versions that do not support STOREcnt or VScnt.
1006	/// STOREcnt and VScnt are the same counter, the name used
1007	/// depends on the ISA version.
1008	unsigned getStorecntBitMask(const IsaVersion &Version);
1009
1010	// The following are only meaningful on targets that support
1011	// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1012
1013	/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1014	/// isa \p Version.
1015	Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1016
1017	/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1018	/// isa \p Version.
1019	Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1020
1021	/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1022	/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1023	/// \p Version.
1024	unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1025
1026	/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1027	/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1028	/// \p Version.
1029	unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1030
1031	namespace Hwreg {
1032
1033	LLVM_READONLY
1034	int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
1035
1036	LLVM_READNONE
1037	bool isValidHwreg(int64_t Id);
1038
1039	LLVM_READNONE
1040	bool isValidHwregOffset(int64_t Offset);
1041
1042	LLVM_READNONE
1043	bool isValidHwregWidth(int64_t Width);
1044
1045	LLVM_READNONE
1046	uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
1047
1048	LLVM_READNONE
1049	StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
1050
1051	void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
1052
1053	} // namespace Hwreg
1054
1055	namespace DepCtr {
1056
1057	int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
1058	int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1059	const MCSubtargetInfo &STI);
1060	bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1061	const MCSubtargetInfo &STI);
1062	bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1063	bool &IsDefault, const MCSubtargetInfo &STI);
1064
1065	/// \returns Decoded VaVdst from given immediate \p Encoded.
1066	unsigned decodeFieldVaVdst(unsigned Encoded);
1067
1068	/// \returns Decoded VmVsrc from given immediate \p Encoded.
1069	unsigned decodeFieldVmVsrc(unsigned Encoded);
1070
1071	/// \returns Decoded SaSdst from given immediate \p Encoded.
1072	unsigned decodeFieldSaSdst(unsigned Encoded);
1073
1074	/// \returns \p VmVsrc as an encoded Depctr immediate.
1075	unsigned encodeFieldVmVsrc(unsigned VmVsrc);
1076
1077	/// \returns \p Encoded combined with encoded \p VmVsrc.
1078	unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1079
1080	/// \returns \p VaVdst as an encoded Depctr immediate.
1081	unsigned encodeFieldVaVdst(unsigned VaVdst);
1082
1083	/// \returns \p Encoded combined with encoded \p VaVdst.
1084	unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1085
1086	/// \returns \p SaSdst as an encoded Depctr immediate.
1087	unsigned encodeFieldSaSdst(unsigned SaSdst);
1088
1089	/// \returns \p Encoded combined with encoded \p SaSdst.
1090	unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1091
1092	} // namespace DepCtr
1093
1094	namespace Exp {
1095
1096	bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1097
1098	LLVM_READONLY
1099	unsigned getTgtId(const StringRef Name);
1100
1101	LLVM_READNONE
1102	bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1103
1104	} // namespace Exp
1105
1106	namespace MTBUFFormat {
1107
1108	LLVM_READNONE
1109	int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1110
1111	void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1112
1113	int64_t getDfmt(const StringRef Name);
1114
1115	StringRef getDfmtName(unsigned Id);
1116
1117	int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1118
1119	StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1120
1121	bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1122
1123	bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1124
1125	int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1126
1127	StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1128
1129	bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1130
1131	int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1132	const MCSubtargetInfo &STI);
1133
1134	bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1135
1136	unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1137
1138	} // namespace MTBUFFormat
1139
1140	namespace SendMsg {
1141
1142	LLVM_READONLY
1143	int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI);
1144
1145	LLVM_READONLY
1146	int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
1147
1148	LLVM_READNONE
1149	StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI);
1150
1151	LLVM_READNONE
1152	StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1153
1154	LLVM_READNONE
1155	bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1156
1157	LLVM_READNONE
1158	bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1159	bool Strict = true);
1160
1161	LLVM_READNONE
1162	bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1163	const MCSubtargetInfo &STI, bool Strict = true);
1164
1165	LLVM_READNONE
1166	bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1167
1168	LLVM_READNONE
1169	bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1170
1171	void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1172	uint16_t &StreamId, const MCSubtargetInfo &STI);
1173
1174	LLVM_READNONE
1175	uint64_t encodeMsg(uint64_t MsgId,
1176	uint64_t OpId,
1177	uint64_t StreamId);
1178
1179	} // namespace SendMsg
1180
1181
1182	unsigned getInitialPSInputAddr(const Function &F);
1183
1184	bool getHasColorExport(const Function &F);
1185
1186	bool getHasDepthExport(const Function &F);
1187
1188	LLVM_READNONE
1189	bool isShader(CallingConv::ID CC);
1190
1191	LLVM_READNONE
1192	bool isGraphics(CallingConv::ID CC);
1193
1194	LLVM_READNONE
1195	bool isCompute(CallingConv::ID CC);
1196
1197	LLVM_READNONE
1198	bool isEntryFunctionCC(CallingConv::ID CC);
1199
1200	// These functions are considered entrypoints into the current module, i.e. they
1201	// are allowed to be called from outside the current module. This is different
1202	// from isEntryFunctionCC, which is only true for functions that are entered by
1203	// the hardware. Module entry points include all entry functions but also
1204	// include functions that can be called from other functions inside or outside
1205	// the current module. Module entry functions are allowed to allocate LDS.
1206	LLVM_READNONE
1207	bool isModuleEntryFunctionCC(CallingConv::ID CC);
1208
1209	LLVM_READNONE
1210	bool isChainCC(CallingConv::ID CC);
1211
1212	bool isKernelCC(const Function *Func);
1213
1214	// FIXME: Remove this when calling conventions cleaned up
1215	LLVM_READNONE
1216	inline bool isKernel(CallingConv::ID CC) {
1217	switch (CC) {
1218	case CallingConv::AMDGPU_KERNEL:
1219	case CallingConv::SPIR_KERNEL:
1220	return true;
1221	default:
1222	return false;
1223	}
1224	}
1225
1226	bool hasXNACK(const MCSubtargetInfo &STI);
1227	bool hasSRAMECC(const MCSubtargetInfo &STI);
1228	bool hasMIMG_R128(const MCSubtargetInfo &STI);
1229	bool hasA16(const MCSubtargetInfo &STI);
1230	bool hasG16(const MCSubtargetInfo &STI);
1231	bool hasPackedD16(const MCSubtargetInfo &STI);
1232	bool hasGDS(const MCSubtargetInfo &STI);
1233	unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1234	unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1235
1236	bool isSI(const MCSubtargetInfo &STI);
1237	bool isCI(const MCSubtargetInfo &STI);
1238	bool isVI(const MCSubtargetInfo &STI);
1239	bool isGFX9(const MCSubtargetInfo &STI);
1240	bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1241	bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1242	bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1243	bool isGFX8Plus(const MCSubtargetInfo &STI);
1244	bool isGFX9Plus(const MCSubtargetInfo &STI);
1245	bool isGFX10(const MCSubtargetInfo &STI);
1246	bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1247	bool isGFX10Plus(const MCSubtargetInfo &STI);
1248	bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1249	bool isGFX10Before1030(const MCSubtargetInfo &STI);
1250	bool isGFX11(const MCSubtargetInfo &STI);
1251	bool isGFX11Plus(const MCSubtargetInfo &STI);
1252	bool isGFX12(const MCSubtargetInfo &STI);
1253	bool isGFX12Plus(const MCSubtargetInfo &STI);
1254	bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1255	bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1256	bool isGCN3Encoding(const MCSubtargetInfo &STI);
1257	bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1258	bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1259	bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1260	bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1261	bool isGFX90A(const MCSubtargetInfo &STI);
1262	bool isGFX940(const MCSubtargetInfo &STI);
1263	bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1264	bool hasMAIInsts(const MCSubtargetInfo &STI);
1265	bool hasVOPD(const MCSubtargetInfo &STI);
1266	bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1267	int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1268	unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1269
1270	/// Is Reg - scalar register
1271	bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
1272
1273	/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1274	/// The bit indicating isHi is the LSB of the encoding.
1275	bool isHi(unsigned Reg, const MCRegisterInfo &MRI);
1276
1277	/// If \p Reg is a pseudo reg, return the correct hardware register given
1278	/// \p STI otherwise return \p Reg.
1279	unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
1280
1281	/// Convert hardware register \p Reg to a pseudo register
1282	LLVM_READNONE
1283	unsigned mc2PseudoReg(unsigned Reg);
1284
1285	LLVM_READNONE
1286	bool isInlineValue(unsigned Reg);
1287
1288	/// Is this an AMDGPU specific source operand? These include registers,
1289	/// inline constants, literals and mandatory literals (KImm).
1290	bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1291
1292	/// Is this a KImm operand?
1293	bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1294
1295	/// Is this floating-point operand?
1296	bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1297
1298	/// Does this operand support only inlinable literals?
1299	bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1300
1301	/// Get the size in bits of a register from the register class \p RC.
1302	unsigned getRegBitWidth(unsigned RCID);
1303
1304	/// Get the size in bits of a register from the register class \p RC.
1305	unsigned getRegBitWidth(const MCRegisterClass &RC);
1306
1307	/// Get size of register operand
1308	unsigned getRegOperandSize(const MCRegisterInfo MRI, const* MCInstrDesc &Desc,
1309	unsigned OpNo);
1310
1311	LLVM_READNONE
1312	inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1313	switch (OpInfo.OperandType) {
1314	case AMDGPU::OPERAND_REG_IMM_INT32:
1315	case AMDGPU::OPERAND_REG_IMM_FP32:
1316	case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1317	case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1318	case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1319	case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1320	case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1321	case AMDGPU::OPERAND_REG_IMM_V2INT32:
1322	case AMDGPU::OPERAND_REG_IMM_V2FP32:
1323	case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1324	case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1325	case AMDGPU::OPERAND_KIMM32:
1326	case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1327	case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1328	return `4`;
1329
1330	case AMDGPU::OPERAND_REG_IMM_INT64:
1331	case AMDGPU::OPERAND_REG_IMM_FP64:
1332	case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1333	case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1334	case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1335	return `8`;
1336
1337	case AMDGPU::OPERAND_REG_IMM_INT16:
1338	case AMDGPU::OPERAND_REG_IMM_FP16:
1339	case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1340	case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1341	case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1342	case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1343	case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1344	case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1345	case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1346	case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1347	case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1348	case AMDGPU::OPERAND_REG_IMM_V2INT16:
1349	case AMDGPU::OPERAND_REG_IMM_V2FP16:
1350	return `2`;
1351
1352	default:
1353	llvm_unreachable("unhandled operand type");
1354	}
1355	}
1356
1357	LLVM_READNONE
1358	inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1359	return getOperandSize(Desc.operands()[OpNo]);
1360	}
1361
1362	/// Is this literal inlinable, and not one of the values intended for floating
1363	/// point values.
1364	LLVM_READNONE
1365	inline bool isInlinableIntLiteral(int64_t Literal) {
1366	return Literal >= -`16` && Literal <= `64`;
1367	}
1368
1369	/// Is this literal inlinable
1370	LLVM_READNONE
1371	bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1372
1373	LLVM_READNONE
1374	bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1375
1376	LLVM_READNONE
1377	bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
1378
1379	LLVM_READNONE
1380	std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1381
1382	LLVM_READNONE
1383	std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1384
1385	LLVM_READNONE
1386	bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
1387
1388	LLVM_READNONE
1389	bool isInlinableLiteralV2I16(uint32_t Literal);
1390
1391	LLVM_READNONE
1392	bool isInlinableLiteralV2F16(uint32_t Literal);
1393
1394	LLVM_READNONE
1395	bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1396
1397	bool isArgPassedInSGPR(const Argument *Arg);
1398
1399	bool isArgPassedInSGPR(const CallBase CB, unsigned* ArgNo);
1400
1401	LLVM_READONLY
1402	bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1403	int64_t EncodedOffset);
1404
1405	LLVM_READONLY
1406	bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1407	int64_t EncodedOffset,
1408	bool IsBuffer);
1409
1410	/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1411	/// offsets.
1412	uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1413
1414	/// \returns The encoding that will be used for \p ByteOffset in the
1415	/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1416	/// S_LOAD instructions have a signed offset, on other subtargets it is
1417	/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1418	std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1419	int64_t ByteOffset, bool IsBuffer);
1420
1421	/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1422	/// instruction. This is only useful on CI.s
1423	std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1424	int64_t ByteOffset);
1425
1426	/// For pre-GFX12 FLAT instructions the offset must be positive;
1427	/// MSB is ignored and forced to zero.
1428	///
1429	/// \return The number of bits available for the signed offset field in flat
1430	/// instructions. Note that some forms of the instruction disallow negative
1431	/// offsets.
1432	unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1433
1434	/// \returns true if this offset is small enough to fit in the SMRD
1435	/// offset field. \p ByteOffset should be the offset in bytes and
1436	/// not the encoded offset.
1437	bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1438
1439	LLVM_READNONE
1440	inline bool isLegalDPALU_DPPControl(unsigned DC) {
1441	return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1442	}
1443
1444	/// \returns true if an instruction may have a 64-bit VGPR operand.
1445	bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc);
1446
1447	/// \returns true if an instruction is a DP ALU DPP.
1448	bool isDPALU_DPP(const MCInstrDesc &OpDesc);
1449
1450	/// \returns true if the intrinsic is divergent
1451	bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1452
1453	/// \returns true if the intrinsic is uniform
1454	bool isIntrinsicAlwaysUniform(unsigned IntrID);
1455
1456	} // end namespace AMDGPU
1457
1458	raw_ostream &operator<<(raw_ostream &OS,
1459	const AMDGPU::IsaInfo::TargetIDSetting S);
1460
1461	} // end namespace llvm
1462
1463	#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1464

source code of llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h