GCNVOPDUtils.cpp source code [llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp]

1	//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file This file contains the AMDGPU DAG scheduling
10	/// mutation to pair VOPD instructions back to back. It also contains
11	// subroutines useful in the creation of VOPD instructions
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "GCNVOPDUtils.h"
16	#include "AMDGPUSubtarget.h"
17	#include "GCNSubtarget.h"
18	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19	#include "SIInstrInfo.h"
20	#include "Utils/AMDGPUBaseInfo.h"
21	#include "llvm/ADT/STLExtras.h"
22	#include "llvm/ADT/SmallVector.h"
23	#include "llvm/CodeGen/MachineBasicBlock.h"
24	#include "llvm/CodeGen/MachineInstr.h"
25	#include "llvm/CodeGen/MachineOperand.h"
26	#include "llvm/CodeGen/MachineRegisterInfo.h"
27	#include "llvm/CodeGen/MacroFusion.h"
28	#include "llvm/CodeGen/ScheduleDAG.h"
29	#include "llvm/CodeGen/ScheduleDAGMutation.h"
30	#include "llvm/CodeGen/TargetInstrInfo.h"
31	#include "llvm/MC/MCInst.h"
32
33	using namespace llvm;
34
35	#define DEBUG_TYPE "gcn-vopd-utils"
36
37	bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
38	const MachineInstr &FirstMI,
39	const MachineInstr &SecondMI) {
40	namespace VOPD = AMDGPU::VOPD;
41
42	const MachineFunction *MF = FirstMI.getMF();
43	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44	const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(Val: ST.getRegisterInfo());
45	const MachineRegisterInfo &MRI = MF->getRegInfo();
46	// Literals also count against scalar bus limit
47	SmallVector<const MachineOperand *> UniqueLiterals;
48	auto addLiteral = [&](const MachineOperand &Op) {
49	for (auto &Literal : UniqueLiterals) {
50	if (Literal->isIdenticalTo(Other: Op))
51	return;
52	}
53	UniqueLiterals.push_back(Elt: &Op);
54	};
55	SmallVector<Register> UniqueScalarRegs;
56	assert([&]() -> bool {
57	for (auto MII = MachineBasicBlock::const_iterator (&FirstMI);
58	MII != FirstMI.getParent()->instr_end(); ++MII) {
59	if (&*MII == &SecondMI)
60	return true;
61	}
62	return false;
63	}() && "Expected FirstMI to precede SecondMI");
64	// Cannot pair dependent instructions
65	for (const auto &Use : SecondMI.uses())
66	if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
67	return false;
68
69	auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
70	const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
71	const MachineOperand &Operand = MI.getOperand(i: OperandIdx);
72	if (Operand.isReg() && TRI->isVectorRegister(MRI, Reg: Operand.getReg()))
73	return Operand.getReg();
74	return Register ();
75	};
76
77	auto InstInfo =
78	AMDGPU::getVOPDInstInfo(OpX: FirstMI.getDesc(), OpY: SecondMI.getDesc());
79
80	for (auto CompIdx : VOPD::COMPONENTS) {
81	const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
82
83	const MachineOperand &Src0 = MI.getOperand(i: VOPD::Component::SRC0);
84	if (Src0.isReg()) {
85	if (!TRI->isVectorRegister(MRI, Reg: Src0.getReg())) {
86	if (!is_contained(Range&: UniqueScalarRegs, Element: Src0.getReg()))
87	UniqueScalarRegs.push_back(Elt: Src0.getReg());
88	}
89	} else {
90	if (!TII.isInlineConstant(MI, OpIdx: VOPD::Component::SRC0))
91	addLiteral (Src0);
92	}
93
94	if (InstInfo [CompIdx].hasMandatoryLiteral()) {
95	auto CompOprIdx = InstInfo [CompIdx].getMandatoryLiteralCompOperandIndex();
96	addLiteral (MI.getOperand(i: CompOprIdx));
97	}
98	if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::Reg: VCC))
99	UniqueScalarRegs.push_back(AMDGPU::Elt: VCC_LO);
100	}
101
102	if (UniqueLiterals.size() > `1`)
103	return false;
104	if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > `2`)
105	return false;
106
107	// On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
108	bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
109	FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
110	SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
111
112	if (InstInfo.hasInvalidOperand(GetRegIdx: getVRegIdx, SkipSrc))
113	return false;
114
115	LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
116	<< "\n\tY: " << SecondMI << "\n");
117	return true;
118	}
119
120	/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
121	/// together. Given SecondMI, when FirstMI is unspecified, then check if
122	/// SecondMI may be part of a fused pair at all.
123	static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
124	const TargetSubtargetInfo &TSI,
125	const MachineInstr *FirstMI,
126	const MachineInstr &SecondMI) {
127	const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
128	unsigned Opc2 = SecondMI.getOpcode();
129	auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc: Opc2);
130
131	// One instruction case
132	if (!FirstMI)
133	return SecondCanBeVOPD.Y;
134
135	unsigned Opc = FirstMI->getOpcode();
136	auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
137
138	if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) \|\|
139	(FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
140	return false;
141
142	return checkVOPDRegConstraints(TII: STII, FirstMI: *FirstMI, SecondMI);
143	}
144
145	namespace {
146	/// Adapts design from MacroFusion
147	/// Puts valid candidate instructions back-to-back so they can easily
148	/// be turned into VOPD instructions
149	/// Greedily pairs instruction candidates. O(n^2) algorithm.
150	struct VOPDPairingMutation : ScheduleDAGMutation {
151	MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
152
153	VOPDPairingMutation(
154	MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
155	: shouldScheduleAdjacent(shouldScheduleAdjacent) {}
156
157	void apply(ScheduleDAGInstrs *DAG) override {
158	const TargetInstrInfo &TII = *DAG->TII;
159	const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
160	if (!AMDGPU::hasVOPD(ST) \|\| !ST.isWave32()) {
161	LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
162	return;
163	}
164
165	std::vector<SUnit>::iterator ISUI, JSUI;
166	for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
167	const MachineInstr *IMI = ISUI ->getInstr();
168	if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
169	continue;
170	if (!hasLessThanNumFused(SU: *ISUI, FuseLimit: `2`))
171	continue;
172
173	for (JSUI = ISUI + `1`; JSUI != DAG->SUnits.end(); ++JSUI) {
174	if (JSUI ->isBoundaryNode())
175	continue;
176	const MachineInstr *JMI = JSUI ->getInstr();
177	if (!hasLessThanNumFused(SU: *JSUI, FuseLimit: `2`) \|\|
178	!shouldScheduleAdjacent(TII, ST, IMI, *JMI))
179	continue;
180	if (fuseInstructionPair(DAG&: DAG, FirstSU&: ISUI, SecondSU&: *JSUI))
181	break;
182	}
183	}
184	LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
185	}
186	};
187	} // namespace
188
189	std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
190	return std::make_unique<VOPDPairingMutation>(args&: shouldScheduleVOPDAdjacent);
191	}
192

source code of llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp