GCNPreRALongBranchReg.cpp source code [llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp]

1	//===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// \file
9	// \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
10	// there is a long branch. Branch size at this point is difficult to track since
11	// we have no idea what spills will be inserted later on. We just assume 8 bytes
12	// per instruction to compute approximations without computing the actual
13	// instruction size to see if we're in the neighborhood of the maximum branch
14	// distrance threshold tuning of what is considered "long" is handled through
15	// amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
16	//===----------------------------------------------------------------------===//
17	#include "AMDGPU.h"
18	#include "GCNSubtarget.h"
19	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20	#include "SIMachineFunctionInfo.h"
21	#include "llvm/CodeGen/MachineFunctionPass.h"
22	#include "llvm/InitializePasses.h"
23
24	using namespace llvm;
25
26	#define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"
27
28	namespace {
29
30	static cl::opt<double> LongBranchFactor(
31	"amdgpu-long-branch-factor", cl::init(Val: `1.0`), cl::Hidden,
32	cl::desc ("Factor to apply to what qualifies as a long branch "
33	"to reserve a pair of scalar registers. If this value "
34	"is 0 the long branch registers are never reserved. As this "
35	"value grows the greater chance the branch distance will fall "
36	"within the threshold and the registers will be marked to be "
37	"reserved. We lean towards always reserving a register for "
38	"long jumps"));
39
40	class GCNPreRALongBranchReg : public MachineFunctionPass {
41
42	struct BasicBlockInfo {
43	// Offset - Distance from the beginning of the function to the beginning
44	// of this basic block.
45	uint64_t Offset = `0`;
46	// Size - Size of the basic block in bytes
47	uint64_t Size = `0`;
48	};
49	void generateBlockInfo(MachineFunction &MF,
50	SmallVectorImpl<BasicBlockInfo> &BlockInfo);
51
52	public:
53	static char ID;
54	GCNPreRALongBranchReg() : MachineFunctionPass (ID) {
55	initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry());
56	}
57	bool runOnMachineFunction(MachineFunction &MF) override;
58	StringRef getPassName() const override {
59	return "AMDGPU Pre-RA Long Branch Reg";
60	}
61	void getAnalysisUsage(AnalysisUsage &AU) const override {
62	AU.setPreservesAll();
63	MachineFunctionPass::getAnalysisUsage(AU);
64	}
65	};
66	} // End anonymous namespace.
67	char GCNPreRALongBranchReg::ID = `0`;
68
69	INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE,
70	"AMDGPU Pre-RA Long Branch Reg", false, false)
71
72	char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID;
73	void GCNPreRALongBranchReg::generateBlockInfo(
74	MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {
75
76	BlockInfo.resize(N: MF.getNumBlockIDs());
77
78	// Approximate the size of all basic blocks by just
79	// assuming 8 bytes per instruction
80	for (const MachineBasicBlock &MBB : MF) {
81	uint64_t NumInstr = `0`;
82	// Loop through the basic block and add up all non-debug
83	// non-meta instructions
84	for (const MachineInstr &MI : MBB) {
85	// isMetaInstruction is a superset of isDebugIstr
86	if (MI.isMetaInstruction())
87	continue;
88	NumInstr += `1`;
89	}
90	// Approximate size as just 8 bytes per instruction
91	BlockInfo [MBB.getNumber()].Size = `8` * NumInstr;
92	}
93	uint64_t PrevNum = (&MF)->begin()->getNumber();
94	for (auto &MBB :
95	make_range(x: std::next(x: MachineFunction::iterator((&MF)->begin())),
96	y: (&MF)->end())) {
97	uint64_t Num = MBB.getNumber();
98	// Compute the offset immediately following this block.
99	BlockInfo [Num].Offset = BlockInfo [PrevNum].Offset + BlockInfo [PrevNum].Size;
100	PrevNum = Num;
101	}
102	}
103	bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
104	const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
105	const SIInstrInfo *TII = STM.getInstrInfo();
106	const SIRegisterInfo *TRI = STM.getRegisterInfo();
107	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
108	MachineRegisterInfo &MRI = MF.getRegInfo();
109
110	// For now, reserve highest available SGPR pair. After RA,
111	// shift down to a lower unused pair of SGPRs
112	// If all registers are used, then findUnusedRegister will return
113	// AMDGPU::NoRegister.
114	constexpr bool ReserveHighestRegister = true;
115	Register LongBranchReservedReg = TRI->findUnusedRegister(
116	MRI, RC: &AMDGPU::SGPR_64RegClass, MF, ReserveHighestVGPR: ReserveHighestRegister);
117	if (!LongBranchReservedReg)
118	return false;
119
120	// Approximate code size and offsets of each basic block
121	SmallVector<BasicBlockInfo, `16`> BlockInfo;
122	generateBlockInfo(MF, BlockInfo);
123
124	for (const MachineBasicBlock &MBB : MF) {
125	MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
126	if (Last == MBB.end() \|\| !Last ->isUnconditionalBranch())
127	continue;
128	MachineBasicBlock DestBB = TII->getBranchDestBlock(MI: Last);
129	uint64_t BlockDistance = static_cast<uint64_t>(
130	LongBranchFactor * BlockInfo [DestBB->getNumber()].Offset);
131	// If the distance falls outside the threshold assume it is a long branch
132	// and we need to reserve the registers
133	if (!TII->isBranchOffsetInRange(BranchOpc: Last ->getOpcode(), BrOffset: BlockDistance)) {
134	MFI->setLongBranchReservedReg(LongBranchReservedReg);
135	return true;
136	}
137	}
138	return false;
139	}
140

source code of llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp