AMDGPUPreLegalizerCombiner.cpp source code [llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp]

1	//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass does combining of machine instructions at the generic MI level,
10	// before the legalizer.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "AMDGPU.h"
15	#include "AMDGPUCombinerHelper.h"
16	#include "AMDGPULegalizerInfo.h"
17	#include "GCNSubtarget.h"
18	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19	#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
20	#include "llvm/CodeGen/GlobalISel/Combiner.h"
21	#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
22	#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
23	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
24	#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
25	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26	#include "llvm/CodeGen/MachineDominators.h"
27	#include "llvm/CodeGen/TargetPassConfig.h"
28	#include "llvm/Target/TargetMachine.h"
29
30	#define GET_GICOMBINER_DEPS
31	#include "AMDGPUGenPreLegalizeGICombiner.inc"
32	#undef GET_GICOMBINER_DEPS
33
34	#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36	using namespace llvm;
37	using namespace MIPatternMatch;
38	namespace {
39
40	#define GET_GICOMBINER_TYPES
41	#include "AMDGPUGenPreLegalizeGICombiner.inc"
42	#undef GET_GICOMBINER_TYPES
43
44	class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45	protected:
46	const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47	const GCNSubtarget &STI;
48	// TODO: Make CombinerHelper methods const.
49	mutable AMDGPUCombinerHelper Helper;
50
51	public:
52	AMDGPUPreLegalizerCombinerImpl(
53	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
54	GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
55	const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
56	const GCNSubtarget &STI, MachineDominatorTree *MDT,
57	const LegalizerInfo *LI);
58
59	static const char getName() { return* "AMDGPUPreLegalizerCombinerImpl"; }
60
61	bool tryCombineAllImpl(MachineInstr &MI) const;
62	bool tryCombineAll(MachineInstr &I) const override;
63
64	struct ClampI64ToI16MatchInfo {
65	int64_t Cmp1 = `0`;
66	int64_t Cmp2 = `0`;
67	Register Origin;
68	};
69
70	bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
71	const MachineFunction &MF,
72	ClampI64ToI16MatchInfo &MatchInfo) const;
73
74	void applyClampI64ToI16(MachineInstr &MI,
75	const ClampI64ToI16MatchInfo &MatchInfo) const;
76
77	private:
78	#define GET_GICOMBINER_CLASS_MEMBERS
79	#define AMDGPUSubtarget GCNSubtarget
80	#include "AMDGPUGenPreLegalizeGICombiner.inc"
81	#undef GET_GICOMBINER_CLASS_MEMBERS
82	#undef AMDGPUSubtarget
83	};
84
85	#define GET_GICOMBINER_IMPL
86	#define AMDGPUSubtarget GCNSubtarget
87	#include "AMDGPUGenPreLegalizeGICombiner.inc"
88	#undef AMDGPUSubtarget
89	#undef GET_GICOMBINER_IMPL
90
91	AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
92	MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
93	GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
94	const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
95	const GCNSubtarget &STI, MachineDominatorTree MDT, const* LegalizerInfo *LI)
96	: Combiner (MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
97	Helper (Observer, B, /IsPreLegalize/ true, &KB, MDT, LI),
98	#define GET_GICOMBINER_CONSTRUCTOR_INITS
99	#include "AMDGPUGenPreLegalizeGICombiner.inc"
100	#undef GET_GICOMBINER_CONSTRUCTOR_INITS
101	{
102	}
103
104	bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
105	if (tryCombineAllImpl(MI))
106	return true;
107
108	switch (MI.getOpcode()) {
109	case TargetOpcode::G_SHUFFLE_VECTOR:
110	return Helper.tryCombineShuffleVector(MI);
111	}
112
113	return false;
114	}
115
116	bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
117	MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
118	ClampI64ToI16MatchInfo &MatchInfo) const {
119	assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
120
121	// Try to find a pattern where an i64 value should get clamped to short.
122	const LLT SrcType = MRI.getType(Reg: MI.getOperand(i: `1`).getReg());
123	if (SrcType != LLT::scalar(SizeInBits: `64`))
124	return false;
125
126	const LLT DstType = MRI.getType(Reg: MI.getOperand(i: `0`).getReg());
127	if (DstType != LLT::scalar(SizeInBits: `16`))
128	return false;
129
130	Register Base;
131
132	auto IsApplicableForCombine = [&MatchInfo]() -> bool {
133	const auto Cmp1 = MatchInfo.Cmp1;
134	const auto Cmp2 = MatchInfo.Cmp2;
135	const auto Diff = std::abs(i: Cmp2 - Cmp1);
136
137	// If the difference between both comparison values is 0 or 1, there is no
138	// need to clamp.
139	if (Diff == `0` \|\| Diff == `1`)
140	return false;
141
142	const int64_t Min = std::numeric_limits<int16_t>::min();
143	const int64_t Max = std::numeric_limits<int16_t>::max();
144
145	// Check if the comparison values are between SHORT_MIN and SHORT_MAX.
146	return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) \|\|
147	(Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
148	};
149
150	// Try to match a combination of min / max MIR opcodes.
151	if (mi_match(MI.getOperand(i: `1`).getReg(), MRI,
152	m_GSMin(m_Reg(R&: Base), m_ICst(Cst&: MatchInfo.Cmp1)))) {
153	if (mi_match(Base, MRI,
154	m_GSMax(m_Reg(R&: MatchInfo.Origin), m_ICst(Cst&: MatchInfo.Cmp2)))) {
155	return IsApplicableForCombine ();
156	}
157	}
158
159	if (mi_match(MI.getOperand(i: `1`).getReg(), MRI,
160	m_GSMax(m_Reg(R&: Base), m_ICst(Cst&: MatchInfo.Cmp1)))) {
161	if (mi_match(Base, MRI,
162	m_GSMin(m_Reg(R&: MatchInfo.Origin), m_ICst(Cst&: MatchInfo.Cmp2)))) {
163	return IsApplicableForCombine ();
164	}
165	}
166
167	return false;
168	}
169
170	// We want to find a combination of instructions that
171	// gets generated when an i64 gets clamped to i16.
172	// The corresponding pattern is:
173	// G_MAX / G_MAX for i16 <= G_TRUNC i64.
174	// This can be efficiently written as following:
175	// v_cvt_pk_i16_i32 v0, v0, v1
176	// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
177	void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
178	MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
179
180	Register Src = MatchInfo.Origin;
181	assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
182	LLT::scalar(`64`));
183	const LLT S32 = LLT::scalar(SizeInBits: `32`);
184
185	B.setInstrAndDebugLoc(MI);
186
187	auto Unmerge = B.buildUnmerge(Res: S32, Op: Src);
188
189	assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
190
191	const LLT V2S16 = LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `16`);
192	auto CvtPk =
193	B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
194	{Unmerge.getReg(`0`), Unmerge.getReg(`1`)}, MI.getFlags());
195
196	auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
197	auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
198	auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
199	auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
200
201	auto Bitcast = B.buildBitcast(Dst: {S32}, Src: CvtPk);
202
203	auto Med3 = B.buildInstr(
204	AMDGPU::G_AMDGPU_SMED3, {S32},
205	{MinBoundaryDst.getReg(`0`), Bitcast.getReg(`0`), MaxBoundaryDst.getReg(`0`)},
206	MI.getFlags());
207
208	B.buildTrunc(Res: MI.getOperand(i: `0`).getReg(), Op: Med3);
209
210	MI.eraseFromParent();
211	}
212
213	// Pass boilerplate
214	// ================
215
216	class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
217	public:
218	static char ID;
219
220	AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
221
222	StringRef getPassName() const override {
223	return "AMDGPUPreLegalizerCombiner";
224	}
225
226	bool runOnMachineFunction(MachineFunction &MF) override;
227
228	void getAnalysisUsage(AnalysisUsage &AU) const override;
229
230	private:
231	bool IsOptNone;
232	AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
233	};
234	} // end anonymous namespace
235
236	void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
237	AU.addRequired<TargetPassConfig>();
238	AU.setPreservesCFG();
239	getSelectionDAGFallbackAnalysisUsage(AU);
240	AU.addRequired<GISelKnownBitsAnalysis>();
241	AU.addPreserved<GISelKnownBitsAnalysis>();
242	if (!IsOptNone) {
243	AU.addRequired<MachineDominatorTree>();
244	AU.addPreserved<MachineDominatorTree>();
245	}
246
247	AU.addRequired<GISelCSEAnalysisWrapperPass>();
248	AU.addPreserved<GISelCSEAnalysisWrapperPass>();
249	MachineFunctionPass::getAnalysisUsage(AU);
250	}
251
252	AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
253	: MachineFunctionPass (ID), IsOptNone(IsOptNone) {
254	initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
255
256	if (!RuleConfig.parseCommandLineOption())
257	report_fatal_error(reason: "Invalid rule identifier");
258	}
259
260	bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
261	if (MF.getProperties().hasProperty(
262	P: MachineFunctionProperties::Property::FailedISel))
263	return false;
264	auto *TPC = &getAnalysis<TargetPassConfig>();
265	const Function &F = MF.getFunction();
266	bool EnableOpt =
267	MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
268	GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
269
270	// Enable CSE.
271	GISelCSEAnalysisWrapper &Wrapper =
272	getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
273	auto *CSEInfo = &Wrapper.get(CSEOpt: TPC->getCSEConfig());
274
275	const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
276	MachineDominatorTree *MDT =
277	IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
278	CombinerInfo CInfo(/AllowIllegalOps/ true, /ShouldLegalizeIllegal/ false,
279	nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
280	AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
281	STI, MDT, STI.getLegalizerInfo());
282	return Impl.combineMachineInstrs();
283	}
284
285	char AMDGPUPreLegalizerCombiner::ID = `0`;
286	INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
287	"Combine AMDGPU machine instrs before legalization",
288	false, false)
289	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
290	INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
291	INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
292	"Combine AMDGPU machine instrs before legalization", false,
293	false)
294
295	namespace llvm {
296	FunctionPass createAMDGPUPreLegalizeCombiner(bool* IsOptNone) {
297	return new AMDGPUPreLegalizerCombiner (IsOptNone);
298	}
299	} // end namespace llvm
300

source code of llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp