1//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPUCombinerHelper.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "llvm/CodeGen/GlobalISel/Combiner.h"
20#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
23#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
24#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26#include "llvm/CodeGen/MachineDominators.h"
27#include "llvm/CodeGen/TargetPassConfig.h"
28#include "llvm/IR/IntrinsicsAMDGPU.h"
29#include "llvm/Target/TargetMachine.h"
30
31#define GET_GICOMBINER_DEPS
32#include "AMDGPUGenPreLegalizeGICombiner.inc"
33#undef GET_GICOMBINER_DEPS
34
35#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36
37using namespace llvm;
38using namespace MIPatternMatch;
39
40namespace {
41#define GET_GICOMBINER_TYPES
42#include "AMDGPUGenPostLegalizeGICombiner.inc"
43#undef GET_GICOMBINER_TYPES
44
45class AMDGPUPostLegalizerCombinerImpl : public Combiner {
46protected:
47 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48 const GCNSubtarget &STI;
49 const SIInstrInfo &TII;
50 // TODO: Make CombinerHelper methods const.
51 mutable AMDGPUCombinerHelper Helper;
52
53public:
54 AMDGPUPostLegalizerCombinerImpl(
55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
61 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
62
63 bool tryCombineAllImpl(MachineInstr &I) const;
64 bool tryCombineAll(MachineInstr &I) const override;
65
66 struct FMinFMaxLegacyInfo {
67 Register LHS;
68 Register RHS;
69 Register True;
70 Register False;
71 CmpInst::Predicate Pred;
72 };
73
74 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
75 bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const;
76 void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
77 const FMinFMaxLegacyInfo &Info) const;
78
79 bool matchUCharToFloat(MachineInstr &MI) const;
80 void applyUCharToFloat(MachineInstr &MI) const;
81
82 bool
83 matchRcpSqrtToRsq(MachineInstr &MI,
84 std::function<void(MachineIRBuilder &)> &MatchInfo) const;
85
86 bool matchFDivSqrtToRsqF16(MachineInstr &MI) const;
87 void applyFDivSqrtToRsqF16(MachineInstr &MI, const Register &X) const;
88
89 // FIXME: Should be able to have 2 separate matchdatas rather than custom
90 // struct boilerplate.
91 struct CvtF32UByteMatchInfo {
92 Register CvtVal;
93 unsigned ShiftOffset;
94 };
95
96 bool matchCvtF32UByteN(MachineInstr &MI,
97 CvtF32UByteMatchInfo &MatchInfo) const;
98 void applyCvtF32UByteN(MachineInstr &MI,
99 const CvtF32UByteMatchInfo &MatchInfo) const;
100
101 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
102
103 // Combine unsigned buffer load and signed extension instructions to generate
104 // signed buffer laod instructions.
105 bool matchCombineSignExtendInReg(
106 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
107 void applyCombineSignExtendInReg(
108 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
109
110 // Find the s_mul_u64 instructions where the higher bits are either
111 // zero-extended or sign-extended.
112 bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
113 // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher
114 // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32
115 // bits are zero extended.
116 void applyCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
117
118private:
119#define GET_GICOMBINER_CLASS_MEMBERS
120#define AMDGPUSubtarget GCNSubtarget
121#include "AMDGPUGenPostLegalizeGICombiner.inc"
122#undef GET_GICOMBINER_CLASS_MEMBERS
123#undef AMDGPUSubtarget
124};
125
126#define GET_GICOMBINER_IMPL
127#define AMDGPUSubtarget GCNSubtarget
128#include "AMDGPUGenPostLegalizeGICombiner.inc"
129#undef AMDGPUSubtarget
130#undef GET_GICOMBINER_IMPL
131
132AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
133 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
134 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
135 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
136 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
137 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
138 TII(*STI.getInstrInfo()),
139 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
140#define GET_GICOMBINER_CONSTRUCTOR_INITS
141#include "AMDGPUGenPostLegalizeGICombiner.inc"
142#undef GET_GICOMBINER_CONSTRUCTOR_INITS
143{
144}
145
146bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
147 if (tryCombineAllImpl(I&: MI))
148 return true;
149
150 switch (MI.getOpcode()) {
151 case TargetOpcode::G_SHL:
152 case TargetOpcode::G_LSHR:
153 case TargetOpcode::G_ASHR:
154 // On some subtargets, 64-bit shift is a quarter rate instruction. In the
155 // common case, splitting this into a move and a 32-bit shift is faster and
156 // the same code size.
157 return Helper.tryCombineShiftToUnmerge(MI, TargetShiftAmount: 32);
158 }
159
160 return false;
161}
162
163bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
164 MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {
165 // FIXME: Type predicate on pattern
166 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()) != LLT::scalar(SizeInBits: 32))
167 return false;
168
169 Register Cond = MI.getOperand(i: 1).getReg();
170 if (!MRI.hasOneNonDBGUse(RegNo: Cond) ||
171 !mi_match(Cond, MRI,
172 m_GFCmp(m_Pred(P&: Info.Pred), m_Reg(R&: Info.LHS), m_Reg(R&: Info.RHS))))
173 return false;
174
175 Info.True = MI.getOperand(i: 2).getReg();
176 Info.False = MI.getOperand(i: 3).getReg();
177
178 // TODO: Handle case where the the selected value is an fneg and the compared
179 // constant is the negation of the selected value.
180 if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
181 !(Info.LHS == Info.False && Info.RHS == Info.True))
182 return false;
183
184 switch (Info.Pred) {
185 case CmpInst::FCMP_FALSE:
186 case CmpInst::FCMP_OEQ:
187 case CmpInst::FCMP_ONE:
188 case CmpInst::FCMP_ORD:
189 case CmpInst::FCMP_UNO:
190 case CmpInst::FCMP_UEQ:
191 case CmpInst::FCMP_UNE:
192 case CmpInst::FCMP_TRUE:
193 return false;
194 default:
195 return true;
196 }
197}
198
199void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(
200 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
201 B.setInstrAndDebugLoc(MI);
202 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
203 B.buildInstr(Opc, {MI.getOperand(i: 0)}, {X, Y}, MI.getFlags());
204 };
205
206 switch (Info.Pred) {
207 case CmpInst::FCMP_ULT:
208 case CmpInst::FCMP_ULE:
209 if (Info.LHS == Info.True)
210 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
211 else
212 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
213 break;
214 case CmpInst::FCMP_OLE:
215 case CmpInst::FCMP_OLT: {
216 // We need to permute the operands to get the correct NaN behavior. The
217 // selected operand is the second one based on the failing compare with NaN,
218 // so permute it based on the compare type the hardware uses.
219 if (Info.LHS == Info.True)
220 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
221 else
222 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
223 break;
224 }
225 case CmpInst::FCMP_UGE:
226 case CmpInst::FCMP_UGT: {
227 if (Info.LHS == Info.True)
228 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
229 else
230 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
231 break;
232 }
233 case CmpInst::FCMP_OGT:
234 case CmpInst::FCMP_OGE: {
235 if (Info.LHS == Info.True)
236 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
237 else
238 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
239 break;
240 }
241 default:
242 llvm_unreachable("predicate should not have matched");
243 }
244
245 MI.eraseFromParent();
246}
247
248bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
249 MachineInstr &MI) const {
250 Register DstReg = MI.getOperand(i: 0).getReg();
251
252 // TODO: We could try to match extracting the higher bytes, which would be
253 // easier if i8 vectors weren't promoted to i32 vectors, particularly after
254 // types are legalized. v4i8 -> v4f32 is probably the only case to worry
255 // about in practice.
256 LLT Ty = MRI.getType(Reg: DstReg);
257 if (Ty == LLT::scalar(SizeInBits: 32) || Ty == LLT::scalar(SizeInBits: 16)) {
258 Register SrcReg = MI.getOperand(i: 1).getReg();
259 unsigned SrcSize = MRI.getType(Reg: SrcReg).getSizeInBits();
260 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
261 const APInt Mask = APInt::getHighBitsSet(numBits: SrcSize, hiBitsSet: SrcSize - 8);
262 return Helper.getKnownBits()->maskedValueIsZero(Val: SrcReg, Mask);
263 }
264
265 return false;
266}
267
268void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
269 MachineInstr &MI) const {
270 B.setInstrAndDebugLoc(MI);
271
272 const LLT S32 = LLT::scalar(SizeInBits: 32);
273
274 Register DstReg = MI.getOperand(i: 0).getReg();
275 Register SrcReg = MI.getOperand(i: 1).getReg();
276 LLT Ty = MRI.getType(Reg: DstReg);
277 LLT SrcTy = MRI.getType(Reg: SrcReg);
278 if (SrcTy != S32)
279 SrcReg = B.buildAnyExtOrTrunc(Res: S32, Op: SrcReg).getReg(Idx: 0);
280
281 if (Ty == S32) {
282 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
283 MI.getFlags());
284 } else {
285 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
286 MI.getFlags());
287 B.buildFPTrunc(Res: DstReg, Op: Cvt0, Flags: MI.getFlags());
288 }
289
290 MI.eraseFromParent();
291}
292
293bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
294 MachineInstr &MI,
295 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
296 auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
297 if (!MI.getFlag(Flag: MachineInstr::FmContract))
298 return nullptr;
299
300 if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
301 if (GI->is(Intrinsic::amdgcn_rcp))
302 return MRI.getVRegDef(Reg: MI.getOperand(i: 2).getReg());
303 }
304 return nullptr;
305 };
306
307 auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
308 if (!MI.getFlag(Flag: MachineInstr::FmContract))
309 return nullptr;
310 MachineInstr *SqrtSrcMI = nullptr;
311 auto Match =
312 mi_match(MI.getOperand(i: 0).getReg(), MRI, m_GFSqrt(m_MInstr(MI&: SqrtSrcMI)));
313 (void)Match;
314 return SqrtSrcMI;
315 };
316
317 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
318 // rcp(sqrt(x))
319 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
320 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
321 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
322 .addUse(SqrtSrcMI->getOperand(0).getReg())
323 .setMIFlags(MI.getFlags());
324 };
325 return true;
326 }
327
328 // sqrt(rcp(x))
329 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
330 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
331 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
332 .addUse(RcpSrcMI->getOperand(0).getReg())
333 .setMIFlags(MI.getFlags());
334 };
335 return true;
336 }
337 return false;
338}
339
340bool AMDGPUPostLegalizerCombinerImpl::matchFDivSqrtToRsqF16(
341 MachineInstr &MI) const {
342 Register Sqrt = MI.getOperand(i: 2).getReg();
343 return MRI.hasOneNonDBGUse(RegNo: Sqrt);
344}
345
346void AMDGPUPostLegalizerCombinerImpl::applyFDivSqrtToRsqF16(
347 MachineInstr &MI, const Register &X) const {
348 Register Dst = MI.getOperand(i: 0).getReg();
349 Register Y = MI.getOperand(i: 1).getReg();
350 LLT DstTy = MRI.getType(Reg: Dst);
351 uint32_t Flags = MI.getFlags();
352 Register RSQ = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {DstTy})
353 .addUse(X)
354 .setMIFlags(Flags)
355 .getReg(0);
356 B.buildFMul(Dst, RSQ, Y, Flags);
357 MI.eraseFromParent();
358}
359
360bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
361 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
362 Register SrcReg = MI.getOperand(i: 1).getReg();
363
364 // Look through G_ZEXT.
365 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(R&: SrcReg)));
366
367 Register Src0;
368 int64_t ShiftAmt;
369 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(R&: Src0), m_ICst(Cst&: ShiftAmt)));
370 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(R&: Src0), m_ICst(Cst&: ShiftAmt)))) {
371 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
372
373 unsigned ShiftOffset = 8 * Offset;
374 if (IsShr)
375 ShiftOffset += ShiftAmt;
376 else
377 ShiftOffset -= ShiftAmt;
378
379 MatchInfo.CvtVal = Src0;
380 MatchInfo.ShiftOffset = ShiftOffset;
381 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
382 }
383
384 // TODO: Simplify demanded bits.
385 return false;
386}
387
388void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
389 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
390 B.setInstrAndDebugLoc(MI);
391 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
392
393 const LLT S32 = LLT::scalar(SizeInBits: 32);
394 Register CvtSrc = MatchInfo.CvtVal;
395 LLT SrcTy = MRI.getType(Reg: MatchInfo.CvtVal);
396 if (SrcTy != S32) {
397 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
398 CvtSrc = B.buildAnyExt(Res: S32, Op: CvtSrc).getReg(Idx: 0);
399 }
400
401 assert(MI.getOpcode() != NewOpc);
402 B.buildInstr(NewOpc, {MI.getOperand(i: 0)}, {CvtSrc}, MI.getFlags());
403 MI.eraseFromParent();
404}
405
406bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
407 MachineInstr &MI, Register &Reg) const {
408 const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
409 MF.getSubtarget().getTargetLowering());
410 Reg = MI.getOperand(i: 1).getReg();
411 return TLI->isCanonicalized(Reg, MF);
412}
413
414// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
415// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
416// with sign extension instrucions in order to generate buffer_load_{i8, i16}
417// instructions.
418
419// Identify buffer_load_{u8, u16}.
420bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
421 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
422 Register LoadReg = MI.getOperand(i: 1).getReg();
423 if (!MRI.hasOneNonDBGUse(RegNo: LoadReg))
424 return false;
425
426 // Check if the first operand of the sign extension is a subword buffer load
427 // instruction.
428 MachineInstr *LoadMI = MRI.getVRegDef(Reg: LoadReg);
429 int64_t Width = MI.getOperand(i: 2).getImm();
430 switch (LoadMI->getOpcode()) {
431 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
432 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};
433 return Width == 8;
434 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
435 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};
436 return Width == 16;
437 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
438 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE};
439 return Width == 8;
440 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
441 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT};
442 return Width == 16;
443 }
444 return false;
445}
446
447// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
448// buffer_load_{i8, i16}.
449void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
450 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
451 auto [LoadMI, NewOpcode] = MatchData;
452 LoadMI->setDesc(TII.get(NewOpcode));
453 // Update the destination register of the load with the destination register
454 // of the sign extension.
455 Register SignExtendInsnDst = MI.getOperand(i: 0).getReg();
456 LoadMI->getOperand(0).setReg(SignExtendInsnDst);
457 // Remove the sign extension.
458 MI.eraseFromParent();
459}
460
461bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(
462 MachineInstr &MI, unsigned &NewOpcode) const {
463 Register Src0 = MI.getOperand(i: 1).getReg();
464 Register Src1 = MI.getOperand(i: 2).getReg();
465 if (MRI.getType(Reg: Src0) != LLT::scalar(SizeInBits: 64))
466 return false;
467
468 if (KB->getKnownBits(R: Src1).countMinLeadingZeros() >= 32 &&
469 KB->getKnownBits(R: Src0).countMinLeadingZeros() >= 32) {
470 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;
471 return true;
472 }
473
474 if (KB->computeNumSignBits(R: Src1) >= 33 &&
475 KB->computeNumSignBits(R: Src0) >= 33) {
476 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;
477 return true;
478 }
479 return false;
480}
481
482void AMDGPUPostLegalizerCombinerImpl::applyCombine_s_mul_u64(
483 MachineInstr &MI, unsigned &NewOpcode) const {
484 Helper.replaceOpcodeWith(FromMI&: MI, ToOpcode: NewOpcode);
485}
486
487// Pass boilerplate
488// ================
489
490class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
491public:
492 static char ID;
493
494 AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
495
496 StringRef getPassName() const override {
497 return "AMDGPUPostLegalizerCombiner";
498 }
499
500 bool runOnMachineFunction(MachineFunction &MF) override;
501
502 void getAnalysisUsage(AnalysisUsage &AU) const override;
503
504private:
505 bool IsOptNone;
506 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
507};
508} // end anonymous namespace
509
510void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
511 AU.addRequired<TargetPassConfig>();
512 AU.setPreservesCFG();
513 getSelectionDAGFallbackAnalysisUsage(AU);
514 AU.addRequired<GISelKnownBitsAnalysis>();
515 AU.addPreserved<GISelKnownBitsAnalysis>();
516 if (!IsOptNone) {
517 AU.addRequired<MachineDominatorTree>();
518 AU.addPreserved<MachineDominatorTree>();
519 }
520 MachineFunctionPass::getAnalysisUsage(AU);
521}
522
523AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
524 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
525 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
526
527 if (!RuleConfig.parseCommandLineOption())
528 report_fatal_error(reason: "Invalid rule identifier");
529}
530
531bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
532 if (MF.getProperties().hasProperty(
533 P: MachineFunctionProperties::Property::FailedISel))
534 return false;
535 auto *TPC = &getAnalysis<TargetPassConfig>();
536 const Function &F = MF.getFunction();
537 bool EnableOpt =
538 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
539
540 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
541 const AMDGPULegalizerInfo *LI =
542 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
543
544 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
545 MachineDominatorTree *MDT =
546 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
547
548 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
549 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
550
551 AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
552 RuleConfig, ST, MDT, LI);
553 return Impl.combineMachineInstrs();
554}
555
556char AMDGPUPostLegalizerCombiner::ID = 0;
557INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
558 "Combine AMDGPU machine instrs after legalization", false,
559 false)
560INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
561INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
562INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
563 "Combine AMDGPU machine instrs after legalization", false,
564 false)
565
566namespace llvm {
567FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
568 return new AMDGPUPostLegalizerCombiner(IsOptNone);
569}
570} // end namespace llvm
571

source code of llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp