1 | //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // This file contains some helper functions which try to cleanup artifacts |
9 | // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make |
10 | // the types match. This file also contains some combines of merges that happens |
11 | // at the end of the legalization. |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
15 | #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
16 | |
17 | #include "llvm/ADT/SmallBitVector.h" |
18 | #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" |
19 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
20 | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
21 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
22 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
23 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
24 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
26 | #include "llvm/CodeGen/Register.h" |
27 | #include "llvm/CodeGen/TargetOpcodes.h" |
28 | #include "llvm/IR/Constants.h" |
29 | #include "llvm/Support/Debug.h" |
30 | |
31 | #define DEBUG_TYPE "legalizer" |
32 | |
33 | namespace llvm { |
34 | class LegalizationArtifactCombiner { |
35 | MachineIRBuilder &Builder; |
36 | MachineRegisterInfo &MRI; |
37 | const LegalizerInfo &LI; |
38 | GISelKnownBits *KB; |
39 | |
40 | static bool isArtifactCast(unsigned Opc) { |
41 | switch (Opc) { |
42 | case TargetOpcode::G_TRUNC: |
43 | case TargetOpcode::G_SEXT: |
44 | case TargetOpcode::G_ZEXT: |
45 | case TargetOpcode::G_ANYEXT: |
46 | return true; |
47 | default: |
48 | return false; |
49 | } |
50 | } |
51 | |
52 | public: |
53 | LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
54 | const LegalizerInfo &LI, |
55 | GISelKnownBits *KB = nullptr) |
56 | : Builder(B), MRI(MRI), LI(LI), KB(KB) {} |
57 | |
58 | bool tryCombineAnyExt(MachineInstr &MI, |
59 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
60 | SmallVectorImpl<Register> &UpdatedDefs, |
61 | GISelObserverWrapper &Observer) { |
62 | using namespace llvm::MIPatternMatch; |
63 | assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); |
64 | |
65 | Builder.setInstrAndDebugLoc(MI); |
66 | Register DstReg = MI.getOperand(i: 0).getReg(); |
67 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
68 | |
69 | // aext(trunc x) - > aext/copy/trunc x |
70 | Register TruncSrc; |
71 | if (mi_match(R: SrcReg, MRI, P: m_GTrunc(Src: m_Reg(R&: TruncSrc)))) { |
72 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
73 | if (MRI.getType(Reg: DstReg) == MRI.getType(Reg: TruncSrc)) |
74 | replaceRegOrBuildCopy(DstReg, SrcReg: TruncSrc, MRI, Builder, UpdatedDefs, |
75 | Observer); |
76 | else |
77 | Builder.buildAnyExtOrTrunc(Res: DstReg, Op: TruncSrc); |
78 | UpdatedDefs.push_back(Elt: DstReg); |
79 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
80 | return true; |
81 | } |
82 | |
83 | // aext([asz]ext x) -> [asz]ext x |
84 | Register ExtSrc; |
85 | MachineInstr *ExtMI; |
86 | if (mi_match(R: SrcReg, MRI, |
87 | P: m_all_of(preds: m_MInstr(MI&: ExtMI), preds: m_any_of(preds: m_GAnyExt(Src: m_Reg(R&: ExtSrc)), |
88 | preds: m_GSExt(Src: m_Reg(R&: ExtSrc)), |
89 | preds: m_GZExt(Src: m_Reg(R&: ExtSrc)))))) { |
90 | Builder.buildInstr(Opc: ExtMI->getOpcode(), DstOps: {DstReg}, SrcOps: {ExtSrc}); |
91 | UpdatedDefs.push_back(Elt: DstReg); |
92 | markInstAndDefDead(MI, DefMI&: *ExtMI, DeadInsts); |
93 | return true; |
94 | } |
95 | |
96 | // Try to fold aext(g_constant) when the larger constant type is legal. |
97 | auto *SrcMI = MRI.getVRegDef(Reg: SrcReg); |
98 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
99 | const LLT DstTy = MRI.getType(Reg: DstReg); |
100 | if (isInstLegal(Query: {TargetOpcode::G_CONSTANT, {DstTy}})) { |
101 | auto &CstVal = SrcMI->getOperand(i: 1); |
102 | Builder.buildConstant( |
103 | Res: DstReg, Val: CstVal.getCImm()->getValue().sext(width: DstTy.getSizeInBits())); |
104 | UpdatedDefs.push_back(Elt: DstReg); |
105 | markInstAndDefDead(MI, DefMI&: *SrcMI, DeadInsts); |
106 | return true; |
107 | } |
108 | } |
109 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
110 | } |
111 | |
112 | bool tryCombineZExt(MachineInstr &MI, |
113 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
114 | SmallVectorImpl<Register> &UpdatedDefs, |
115 | GISelObserverWrapper &Observer) { |
116 | using namespace llvm::MIPatternMatch; |
117 | assert(MI.getOpcode() == TargetOpcode::G_ZEXT); |
118 | |
119 | Builder.setInstrAndDebugLoc(MI); |
120 | Register DstReg = MI.getOperand(i: 0).getReg(); |
121 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
122 | |
123 | // zext(trunc x) - > and (aext/copy/trunc x), mask |
124 | // zext(sext x) -> and (sext x), mask |
125 | Register TruncSrc; |
126 | Register SextSrc; |
127 | if (mi_match(R: SrcReg, MRI, P: m_GTrunc(Src: m_Reg(R&: TruncSrc))) || |
128 | mi_match(R: SrcReg, MRI, P: m_GSExt(Src: m_Reg(R&: SextSrc)))) { |
129 | LLT DstTy = MRI.getType(Reg: DstReg); |
130 | if (isInstUnsupported(Query: {TargetOpcode::G_AND, {DstTy}}) || |
131 | isConstantUnsupported(Ty: DstTy)) |
132 | return false; |
133 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
134 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
135 | APInt MaskVal = APInt::getAllOnes(numBits: SrcTy.getScalarSizeInBits()); |
136 | if (SextSrc && (DstTy != MRI.getType(Reg: SextSrc))) |
137 | SextSrc = Builder.buildSExtOrTrunc(Res: DstTy, Op: SextSrc).getReg(Idx: 0); |
138 | if (TruncSrc && (DstTy != MRI.getType(Reg: TruncSrc))) |
139 | TruncSrc = Builder.buildAnyExtOrTrunc(Res: DstTy, Op: TruncSrc).getReg(Idx: 0); |
140 | APInt ExtMaskVal = MaskVal.zext(width: DstTy.getScalarSizeInBits()); |
141 | Register AndSrc = SextSrc ? SextSrc : TruncSrc; |
142 | // Elide G_AND and mask constant if possible. |
143 | // The G_AND would also be removed by the post-legalize redundant_and |
144 | // combine, but in this very common case, eliding early and regardless of |
145 | // OptLevel results in significant compile-time and O0 code-size |
146 | // improvements. Inserting unnecessary instructions between boolean defs |
147 | // and uses hinders a lot of folding during ISel. |
148 | if (KB && (KB->getKnownZeroes(R: AndSrc) | ExtMaskVal).isAllOnes()) { |
149 | replaceRegOrBuildCopy(DstReg, SrcReg: AndSrc, MRI, Builder, UpdatedDefs, |
150 | Observer); |
151 | } else { |
152 | auto Mask = Builder.buildConstant(Res: DstTy, Val: ExtMaskVal); |
153 | Builder.buildAnd(Dst: DstReg, Src0: AndSrc, Src1: Mask); |
154 | } |
155 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
156 | return true; |
157 | } |
158 | |
159 | // zext(zext x) -> (zext x) |
160 | Register ZextSrc; |
161 | if (mi_match(R: SrcReg, MRI, P: m_GZExt(Src: m_Reg(R&: ZextSrc)))) { |
162 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
163 | Observer.changingInstr(MI); |
164 | MI.getOperand(i: 1).setReg(ZextSrc); |
165 | Observer.changedInstr(MI); |
166 | UpdatedDefs.push_back(Elt: DstReg); |
167 | markDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
168 | return true; |
169 | } |
170 | |
171 | // Try to fold zext(g_constant) when the larger constant type is legal. |
172 | auto *SrcMI = MRI.getVRegDef(Reg: SrcReg); |
173 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
174 | const LLT DstTy = MRI.getType(Reg: DstReg); |
175 | if (isInstLegal(Query: {TargetOpcode::G_CONSTANT, {DstTy}})) { |
176 | auto &CstVal = SrcMI->getOperand(i: 1); |
177 | Builder.buildConstant( |
178 | Res: DstReg, Val: CstVal.getCImm()->getValue().zext(width: DstTy.getSizeInBits())); |
179 | UpdatedDefs.push_back(Elt: DstReg); |
180 | markInstAndDefDead(MI, DefMI&: *SrcMI, DeadInsts); |
181 | return true; |
182 | } |
183 | } |
184 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
185 | } |
186 | |
187 | bool tryCombineSExt(MachineInstr &MI, |
188 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
189 | SmallVectorImpl<Register> &UpdatedDefs) { |
190 | using namespace llvm::MIPatternMatch; |
191 | assert(MI.getOpcode() == TargetOpcode::G_SEXT); |
192 | |
193 | Builder.setInstrAndDebugLoc(MI); |
194 | Register DstReg = MI.getOperand(i: 0).getReg(); |
195 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
196 | |
197 | // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) |
198 | Register TruncSrc; |
199 | if (mi_match(R: SrcReg, MRI, P: m_GTrunc(Src: m_Reg(R&: TruncSrc)))) { |
200 | LLT DstTy = MRI.getType(Reg: DstReg); |
201 | if (isInstUnsupported(Query: {TargetOpcode::G_SEXT_INREG, {DstTy}})) |
202 | return false; |
203 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
204 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
205 | uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); |
206 | if (DstTy != MRI.getType(Reg: TruncSrc)) |
207 | TruncSrc = Builder.buildAnyExtOrTrunc(Res: DstTy, Op: TruncSrc).getReg(Idx: 0); |
208 | Builder.buildSExtInReg(Res: DstReg, Op: TruncSrc, ImmOp: SizeInBits); |
209 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
210 | return true; |
211 | } |
212 | |
213 | // sext(zext x) -> (zext x) |
214 | // sext(sext x) -> (sext x) |
215 | Register ExtSrc; |
216 | MachineInstr *ExtMI; |
217 | if (mi_match(R: SrcReg, MRI, |
218 | P: m_all_of(preds: m_MInstr(MI&: ExtMI), preds: m_any_of(preds: m_GZExt(Src: m_Reg(R&: ExtSrc)), |
219 | preds: m_GSExt(Src: m_Reg(R&: ExtSrc)))))) { |
220 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
221 | Builder.buildInstr(Opc: ExtMI->getOpcode(), DstOps: {DstReg}, SrcOps: {ExtSrc}); |
222 | UpdatedDefs.push_back(Elt: DstReg); |
223 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
224 | return true; |
225 | } |
226 | |
227 | // Try to fold sext(g_constant) when the larger constant type is legal. |
228 | auto *SrcMI = MRI.getVRegDef(Reg: SrcReg); |
229 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
230 | const LLT DstTy = MRI.getType(Reg: DstReg); |
231 | if (isInstLegal(Query: {TargetOpcode::G_CONSTANT, {DstTy}})) { |
232 | auto &CstVal = SrcMI->getOperand(i: 1); |
233 | Builder.buildConstant( |
234 | Res: DstReg, Val: CstVal.getCImm()->getValue().sext(width: DstTy.getSizeInBits())); |
235 | UpdatedDefs.push_back(Elt: DstReg); |
236 | markInstAndDefDead(MI, DefMI&: *SrcMI, DeadInsts); |
237 | return true; |
238 | } |
239 | } |
240 | |
241 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
242 | } |
243 | |
244 | bool tryCombineTrunc(MachineInstr &MI, |
245 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
246 | SmallVectorImpl<Register> &UpdatedDefs, |
247 | GISelObserverWrapper &Observer) { |
248 | using namespace llvm::MIPatternMatch; |
249 | assert(MI.getOpcode() == TargetOpcode::G_TRUNC); |
250 | |
251 | Builder.setInstr(MI); |
252 | Register DstReg = MI.getOperand(i: 0).getReg(); |
253 | const LLT DstTy = MRI.getType(Reg: DstReg); |
254 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
255 | |
256 | // Try to fold trunc(g_constant) when the smaller constant type is legal. |
257 | auto *SrcMI = MRI.getVRegDef(Reg: SrcReg); |
258 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
259 | if (isInstLegal(Query: {TargetOpcode::G_CONSTANT, {DstTy}})) { |
260 | auto &CstVal = SrcMI->getOperand(i: 1); |
261 | Builder.buildConstant( |
262 | Res: DstReg, Val: CstVal.getCImm()->getValue().trunc(width: DstTy.getSizeInBits())); |
263 | UpdatedDefs.push_back(Elt: DstReg); |
264 | markInstAndDefDead(MI, DefMI&: *SrcMI, DeadInsts); |
265 | return true; |
266 | } |
267 | } |
268 | |
269 | // Try to fold trunc(merge) to directly use the source of the merge. |
270 | // This gets rid of large, difficult to legalize, merges |
271 | if (auto *SrcMerge = dyn_cast<GMerge>(Val: SrcMI)) { |
272 | const Register MergeSrcReg = SrcMerge->getSourceReg(I: 0); |
273 | const LLT MergeSrcTy = MRI.getType(Reg: MergeSrcReg); |
274 | |
275 | // We can only fold if the types are scalar |
276 | const unsigned DstSize = DstTy.getSizeInBits(); |
277 | const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits(); |
278 | if (!DstTy.isScalar() || !MergeSrcTy.isScalar()) |
279 | return false; |
280 | |
281 | if (DstSize < MergeSrcSize) { |
282 | // When the merge source is larger than the destination, we can just |
283 | // truncate the merge source directly |
284 | if (isInstUnsupported(Query: {TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}})) |
285 | return false; |
286 | |
287 | LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: " |
288 | << MI); |
289 | |
290 | Builder.buildTrunc(Res: DstReg, Op: MergeSrcReg); |
291 | UpdatedDefs.push_back(Elt: DstReg); |
292 | } else if (DstSize == MergeSrcSize) { |
293 | // If the sizes match we can simply try to replace the register |
294 | LLVM_DEBUG( |
295 | dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: " |
296 | << MI); |
297 | replaceRegOrBuildCopy(DstReg, SrcReg: MergeSrcReg, MRI, Builder, UpdatedDefs, |
298 | Observer); |
299 | } else if (DstSize % MergeSrcSize == 0) { |
300 | // If the trunc size is a multiple of the merge source size we can use |
301 | // a smaller merge instead |
302 | if (isInstUnsupported( |
303 | Query: {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}})) |
304 | return false; |
305 | |
306 | LLVM_DEBUG( |
307 | dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: " |
308 | << MI); |
309 | |
310 | const unsigned NumSrcs = DstSize / MergeSrcSize; |
311 | assert(NumSrcs < SrcMI->getNumOperands() - 1 && |
312 | "trunc(merge) should require less inputs than merge" ); |
313 | SmallVector<Register, 8> SrcRegs(NumSrcs); |
314 | for (unsigned i = 0; i < NumSrcs; ++i) |
315 | SrcRegs[i] = SrcMerge->getSourceReg(I: i); |
316 | |
317 | Builder.buildMergeValues(Res: DstReg, Ops: SrcRegs); |
318 | UpdatedDefs.push_back(Elt: DstReg); |
319 | } else { |
320 | // Unable to combine |
321 | return false; |
322 | } |
323 | |
324 | markInstAndDefDead(MI, DefMI&: *SrcMerge, DeadInsts); |
325 | return true; |
326 | } |
327 | |
328 | // trunc(trunc) -> trunc |
329 | Register TruncSrc; |
330 | if (mi_match(R: SrcReg, MRI, P: m_GTrunc(Src: m_Reg(R&: TruncSrc)))) { |
331 | // Always combine trunc(trunc) since the eventual resulting trunc must be |
332 | // legal anyway as it must be legal for all outputs of the consumer type |
333 | // set. |
334 | LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); |
335 | |
336 | Builder.buildTrunc(Res: DstReg, Op: TruncSrc); |
337 | UpdatedDefs.push_back(Elt: DstReg); |
338 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: TruncSrc), DeadInsts); |
339 | return true; |
340 | } |
341 | |
342 | // trunc(ext x) -> x |
343 | ArtifactValueFinder Finder(MRI, Builder, LI); |
344 | if (Register FoundReg = |
345 | Finder.findValueFromDef(DefReg: DstReg, StartBit: 0, Size: DstTy.getSizeInBits())) { |
346 | LLT FoundRegTy = MRI.getType(Reg: FoundReg); |
347 | if (DstTy == FoundRegTy) { |
348 | LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_[S,Z,ANY]EXT/G_TRUNC...): " |
349 | << MI;); |
350 | |
351 | replaceRegOrBuildCopy(DstReg, SrcReg: FoundReg, MRI, Builder, UpdatedDefs, |
352 | Observer); |
353 | UpdatedDefs.push_back(Elt: DstReg); |
354 | markInstAndDefDead(MI, DefMI&: *MRI.getVRegDef(Reg: SrcReg), DeadInsts); |
355 | return true; |
356 | } |
357 | } |
358 | |
359 | return false; |
360 | } |
361 | |
362 | /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). |
363 | bool tryFoldImplicitDef(MachineInstr &MI, |
364 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
365 | SmallVectorImpl<Register> &UpdatedDefs) { |
366 | unsigned Opcode = MI.getOpcode(); |
367 | assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || |
368 | Opcode == TargetOpcode::G_SEXT); |
369 | |
370 | if (MachineInstr *DefMI = getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, |
371 | Reg: MI.getOperand(i: 1).getReg(), MRI)) { |
372 | Builder.setInstr(MI); |
373 | Register DstReg = MI.getOperand(i: 0).getReg(); |
374 | LLT DstTy = MRI.getType(Reg: DstReg); |
375 | |
376 | if (Opcode == TargetOpcode::G_ANYEXT) { |
377 | // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF |
378 | if (!isInstLegal(Query: {TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) |
379 | return false; |
380 | LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); |
381 | Builder.buildInstr(Opc: TargetOpcode::G_IMPLICIT_DEF, DstOps: {DstReg}, SrcOps: {}); |
382 | UpdatedDefs.push_back(Elt: DstReg); |
383 | } else { |
384 | // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top |
385 | // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. |
386 | if (isConstantUnsupported(Ty: DstTy)) |
387 | return false; |
388 | LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); |
389 | Builder.buildConstant(Res: DstReg, Val: 0); |
390 | UpdatedDefs.push_back(Elt: DstReg); |
391 | } |
392 | |
393 | markInstAndDefDead(MI, DefMI&: *DefMI, DeadInsts); |
394 | return true; |
395 | } |
396 | return false; |
397 | } |
398 | |
399 | bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI, |
400 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
401 | SmallVectorImpl<Register> &UpdatedDefs) { |
402 | |
403 | assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); |
404 | |
405 | const unsigned CastOpc = CastMI.getOpcode(); |
406 | |
407 | if (!isArtifactCast(Opc: CastOpc)) |
408 | return false; |
409 | |
410 | const unsigned NumDefs = MI.getNumOperands() - 1; |
411 | |
412 | const Register CastSrcReg = CastMI.getOperand(i: 1).getReg(); |
413 | const LLT CastSrcTy = MRI.getType(Reg: CastSrcReg); |
414 | const LLT DestTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg()); |
415 | const LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: NumDefs).getReg()); |
416 | |
417 | const unsigned CastSrcSize = CastSrcTy.getSizeInBits(); |
418 | const unsigned DestSize = DestTy.getSizeInBits(); |
419 | |
420 | if (CastOpc == TargetOpcode::G_TRUNC) { |
421 | if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) { |
422 | // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>) |
423 | // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 |
424 | // => |
425 | // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0 |
426 | // %2:_(s8) = G_TRUNC %6 |
427 | // %3:_(s8) = G_TRUNC %7 |
428 | // %4:_(s8) = G_TRUNC %8 |
429 | // %5:_(s8) = G_TRUNC %9 |
430 | |
431 | unsigned UnmergeNumElts = |
432 | DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; |
433 | LLT UnmergeTy = CastSrcTy.changeElementCount( |
434 | EC: ElementCount::getFixed(MinVal: UnmergeNumElts)); |
435 | LLT SrcWideTy = |
436 | SrcTy.changeElementCount(EC: ElementCount::getFixed(MinVal: UnmergeNumElts)); |
437 | |
438 | if (isInstUnsupported( |
439 | Query: {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}) || |
440 | LI.getAction(Query: {TargetOpcode::G_TRUNC, {SrcWideTy, UnmergeTy}}) |
441 | .Action == LegalizeActions::MoreElements) |
442 | return false; |
443 | |
444 | Builder.setInstr(MI); |
445 | auto NewUnmerge = Builder.buildUnmerge(Res: UnmergeTy, Op: CastSrcReg); |
446 | |
447 | for (unsigned I = 0; I != NumDefs; ++I) { |
448 | Register DefReg = MI.getOperand(i: I).getReg(); |
449 | UpdatedDefs.push_back(Elt: DefReg); |
450 | Builder.buildTrunc(Res: DefReg, Op: NewUnmerge.getReg(Idx: I)); |
451 | } |
452 | |
453 | markInstAndDefDead(MI, DefMI&: CastMI, DeadInsts); |
454 | return true; |
455 | } |
456 | |
457 | if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) { |
458 | // %1:_(s16) = G_TRUNC %0(s32) |
459 | // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 |
460 | // => |
461 | // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 |
462 | |
463 | // Unmerge(trunc) can be combined if the trunc source size is a multiple |
464 | // of the unmerge destination size |
465 | if (CastSrcSize % DestSize != 0) |
466 | return false; |
467 | |
468 | // Check if the new unmerge is supported |
469 | if (isInstUnsupported( |
470 | Query: {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}})) |
471 | return false; |
472 | |
473 | // Gather the original destination registers and create new ones for the |
474 | // unused bits |
475 | const unsigned NewNumDefs = CastSrcSize / DestSize; |
476 | SmallVector<Register, 8> DstRegs(NewNumDefs); |
477 | for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) { |
478 | if (Idx < NumDefs) |
479 | DstRegs[Idx] = MI.getOperand(i: Idx).getReg(); |
480 | else |
481 | DstRegs[Idx] = MRI.createGenericVirtualRegister(Ty: DestTy); |
482 | } |
483 | |
484 | // Build new unmerge |
485 | Builder.setInstr(MI); |
486 | Builder.buildUnmerge(Res: DstRegs, Op: CastSrcReg); |
487 | UpdatedDefs.append(in_start: DstRegs.begin(), in_end: DstRegs.begin() + NewNumDefs); |
488 | markInstAndDefDead(MI, DefMI&: CastMI, DeadInsts); |
489 | return true; |
490 | } |
491 | } |
492 | |
493 | // TODO: support combines with other casts as well |
494 | return false; |
495 | } |
496 | |
497 | static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, |
498 | LLT OpTy, LLT DestTy) { |
499 | // Check if we found a definition that is like G_MERGE_VALUES. |
500 | switch (MergeOp) { |
501 | default: |
502 | return false; |
503 | case TargetOpcode::G_BUILD_VECTOR: |
504 | case TargetOpcode::G_MERGE_VALUES: |
505 | // The convert operation that we will need to insert is |
506 | // going to convert the input of that type of instruction (scalar) |
507 | // to the destination type (DestTy). |
508 | // The conversion needs to stay in the same domain (scalar to scalar |
509 | // and vector to vector), so if we were to allow to fold the merge |
510 | // we would need to insert some bitcasts. |
511 | // E.g., |
512 | // <2 x s16> = build_vector s16, s16 |
513 | // <2 x s32> = zext <2 x s16> |
514 | // <2 x s16>, <2 x s16> = unmerge <2 x s32> |
515 | // |
516 | // As is the folding would produce: |
517 | // <2 x s16> = zext s16 <-- scalar to vector |
518 | // <2 x s16> = zext s16 <-- scalar to vector |
519 | // Which is invalid. |
520 | // Instead we would want to generate: |
521 | // s32 = zext s16 |
522 | // <2 x s16> = bitcast s32 |
523 | // s32 = zext s16 |
524 | // <2 x s16> = bitcast s32 |
525 | // |
526 | // That is not done yet. |
527 | if (ConvertOp == 0) |
528 | return true; |
529 | return !DestTy.isVector() && OpTy.isVector() && |
530 | DestTy == OpTy.getElementType(); |
531 | case TargetOpcode::G_CONCAT_VECTORS: { |
532 | if (ConvertOp == 0) |
533 | return true; |
534 | if (!DestTy.isVector()) |
535 | return false; |
536 | |
537 | const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); |
538 | |
539 | // Don't handle scalarization with a cast that isn't in the same |
540 | // direction as the vector cast. This could be handled, but it would |
541 | // require more intermediate unmerges. |
542 | if (ConvertOp == TargetOpcode::G_TRUNC) |
543 | return DestTy.getSizeInBits() <= OpEltSize; |
544 | return DestTy.getSizeInBits() >= OpEltSize; |
545 | } |
546 | } |
547 | } |
548 | |
549 | /// Try to replace DstReg with SrcReg or build a COPY instruction |
550 | /// depending on the register constraints. |
551 | static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg, |
552 | MachineRegisterInfo &MRI, |
553 | MachineIRBuilder &Builder, |
554 | SmallVectorImpl<Register> &UpdatedDefs, |
555 | GISelChangeObserver &Observer) { |
556 | if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { |
557 | Builder.buildCopy(Res: DstReg, Op: SrcReg); |
558 | UpdatedDefs.push_back(Elt: DstReg); |
559 | return; |
560 | } |
561 | SmallVector<MachineInstr *, 4> UseMIs; |
562 | // Get the users and notify the observer before replacing. |
563 | for (auto &UseMI : MRI.use_instructions(Reg: DstReg)) { |
564 | UseMIs.push_back(Elt: &UseMI); |
565 | Observer.changingInstr(MI&: UseMI); |
566 | } |
567 | // Replace the registers. |
568 | MRI.replaceRegWith(FromReg: DstReg, ToReg: SrcReg); |
569 | UpdatedDefs.push_back(Elt: SrcReg); |
570 | // Notify the observer that we changed the instructions. |
571 | for (auto *UseMI : UseMIs) |
572 | Observer.changedInstr(MI&: *UseMI); |
573 | } |
574 | |
575 | /// Return the operand index in \p MI that defines \p Def |
576 | static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { |
577 | unsigned DefIdx = 0; |
578 | for (const MachineOperand &Def : MI.defs()) { |
579 | if (Def.getReg() == SearchDef) |
580 | break; |
581 | ++DefIdx; |
582 | } |
583 | |
584 | return DefIdx; |
585 | } |
586 | |
587 | /// This class provides utilities for finding source registers of specific |
588 | /// bit ranges in an artifact. The routines can look through the source |
589 | /// registers if they're other artifacts to try to find a non-artifact source |
590 | /// of a value. |
591 | class ArtifactValueFinder { |
592 | MachineRegisterInfo &MRI; |
593 | MachineIRBuilder &MIB; |
594 | const LegalizerInfo &LI; |
595 | |
596 | // Stores the best register found in the current query so far. |
597 | Register CurrentBest = Register(); |
598 | |
599 | /// Given an concat_vector op \p Concat and a start bit and size, try to |
600 | /// find the origin of the value defined by that start position and size. |
601 | /// |
602 | /// \returns a register with the requested size, or the current best |
603 | /// register found during the current query. |
604 | Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, |
605 | unsigned Size) { |
606 | assert(Size > 0); |
607 | |
608 | // Find the source operand that provides the bits requested. |
609 | Register Src1Reg = Concat.getSourceReg(I: 0); |
610 | unsigned SrcSize = MRI.getType(Reg: Src1Reg).getSizeInBits(); |
611 | |
612 | // Operand index of the source that provides the start of the bit range. |
613 | unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
614 | // Offset into the source at which the bit range starts. |
615 | unsigned InRegOffset = StartBit % SrcSize; |
616 | // Check that the bits don't span multiple sources. |
617 | // FIXME: we might be able return multiple sources? Or create an |
618 | // appropriate concat to make it fit. |
619 | if (InRegOffset + Size > SrcSize) |
620 | return CurrentBest; |
621 | |
622 | Register SrcReg = Concat.getReg(Idx: StartSrcIdx); |
623 | if (InRegOffset == 0 && Size == SrcSize) { |
624 | CurrentBest = SrcReg; |
625 | return findValueFromDefImpl(DefReg: SrcReg, StartBit: 0, Size); |
626 | } |
627 | |
628 | return findValueFromDefImpl(DefReg: SrcReg, StartBit: InRegOffset, Size); |
629 | } |
630 | |
631 | /// Given an build_vector op \p BV and a start bit and size, try to find |
632 | /// the origin of the value defined by that start position and size. |
633 | /// |
634 | /// \returns a register with the requested size, or the current best |
635 | /// register found during the current query. |
636 | Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, |
637 | unsigned Size) { |
638 | assert(Size > 0); |
639 | |
640 | // Find the source operand that provides the bits requested. |
641 | Register Src1Reg = BV.getSourceReg(I: 0); |
642 | unsigned SrcSize = MRI.getType(Reg: Src1Reg).getSizeInBits(); |
643 | |
644 | // Operand index of the source that provides the start of the bit range. |
645 | unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
646 | // Offset into the source at which the bit range starts. |
647 | unsigned InRegOffset = StartBit % SrcSize; |
648 | |
649 | if (InRegOffset != 0) |
650 | return CurrentBest; // Give up, bits don't start at a scalar source. |
651 | if (Size < SrcSize) |
652 | return CurrentBest; // Scalar source is too large for requested bits. |
653 | |
654 | // If the bits cover multiple sources evenly, then create a new |
655 | // build_vector to synthesize the required size, if that's been requested. |
656 | if (Size > SrcSize) { |
657 | if (Size % SrcSize > 0) |
658 | return CurrentBest; // Isn't covered exactly by sources. |
659 | |
660 | unsigned NumSrcsUsed = Size / SrcSize; |
661 | // If we're requesting all of the sources, just return this def. |
662 | if (NumSrcsUsed == BV.getNumSources()) |
663 | return BV.getReg(Idx: 0); |
664 | |
665 | LLT SrcTy = MRI.getType(Reg: Src1Reg); |
666 | LLT NewBVTy = LLT::fixed_vector(NumElements: NumSrcsUsed, ScalarTy: SrcTy); |
667 | |
668 | // Check if the resulting build vector would be legal. |
669 | LegalizeActionStep ActionStep = |
670 | LI.getAction(Query: {TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); |
671 | if (ActionStep.Action != LegalizeActions::Legal) |
672 | return CurrentBest; |
673 | |
674 | SmallVector<Register> NewSrcs; |
675 | for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; |
676 | ++SrcIdx) |
677 | NewSrcs.push_back(Elt: BV.getReg(Idx: SrcIdx)); |
678 | MIB.setInstrAndDebugLoc(BV); |
679 | return MIB.buildBuildVector(Res: NewBVTy, Ops: NewSrcs).getReg(Idx: 0); |
680 | } |
681 | // A single source is requested, just return it. |
682 | return BV.getReg(Idx: StartSrcIdx); |
683 | } |
684 | |
685 | /// Given an G_INSERT op \p MI and a start bit and size, try to find |
686 | /// the origin of the value defined by that start position and size. |
687 | /// |
688 | /// \returns a register with the requested size, or the current best |
689 | /// register found during the current query. |
690 | Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, |
691 | unsigned Size) { |
692 | assert(MI.getOpcode() == TargetOpcode::G_INSERT); |
693 | assert(Size > 0); |
694 | |
695 | Register ContainerSrcReg = MI.getOperand(i: 1).getReg(); |
696 | Register InsertedReg = MI.getOperand(i: 2).getReg(); |
697 | LLT InsertedRegTy = MRI.getType(Reg: InsertedReg); |
698 | unsigned InsertOffset = MI.getOperand(i: 3).getImm(); |
699 | |
700 | // There are 4 possible container/insertreg + requested bit-range layouts |
701 | // that the instruction and query could be representing. |
702 | // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') |
703 | // and a start bit 'SB', with size S, giving an end bit 'EB', we could |
704 | // have... |
705 | // Scenario A: |
706 | // -------------------------- |
707 | // | INS | CONTAINER | |
708 | // -------------------------- |
709 | // | | |
710 | // SB EB |
711 | // |
712 | // Scenario B: |
713 | // -------------------------- |
714 | // | INS | CONTAINER | |
715 | // -------------------------- |
716 | // | | |
717 | // SB EB |
718 | // |
719 | // Scenario C: |
720 | // -------------------------- |
721 | // | CONTAINER | INS | |
722 | // -------------------------- |
723 | // | | |
724 | // SB EB |
725 | // |
726 | // Scenario D: |
727 | // -------------------------- |
728 | // | CONTAINER | INS | |
729 | // -------------------------- |
730 | // | | |
731 | // SB EB |
732 | // |
733 | // So therefore, A and D are requesting data from the INS operand, while |
734 | // B and C are requesting from the container operand. |
735 | |
736 | unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); |
737 | unsigned EndBit = StartBit + Size; |
738 | unsigned NewStartBit; |
739 | Register SrcRegToUse; |
740 | if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { |
741 | SrcRegToUse = ContainerSrcReg; |
742 | NewStartBit = StartBit; |
743 | return findValueFromDefImpl(DefReg: SrcRegToUse, StartBit: NewStartBit, Size); |
744 | } |
745 | if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { |
746 | SrcRegToUse = InsertedReg; |
747 | NewStartBit = StartBit - InsertOffset; |
748 | if (NewStartBit == 0 && |
749 | Size == MRI.getType(Reg: SrcRegToUse).getSizeInBits()) |
750 | CurrentBest = SrcRegToUse; |
751 | return findValueFromDefImpl(DefReg: SrcRegToUse, StartBit: NewStartBit, Size); |
752 | } |
753 | // The bit range spans both the inserted and container regions. |
754 | return Register(); |
755 | } |
756 | |
757 | /// Given an G_SEXT, G_ZEXT, G_ANYEXT op \p MI and a start bit and |
758 | /// size, try to find the origin of the value defined by that start |
759 | /// position and size. |
760 | /// |
761 | /// \returns a register with the requested size, or the current best |
762 | /// register found during the current query. |
763 | Register findValueFromExt(MachineInstr &MI, unsigned StartBit, |
764 | unsigned Size) { |
765 | assert(MI.getOpcode() == TargetOpcode::G_SEXT || |
766 | MI.getOpcode() == TargetOpcode::G_ZEXT || |
767 | MI.getOpcode() == TargetOpcode::G_ANYEXT); |
768 | assert(Size > 0); |
769 | |
770 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
771 | LLT SrcType = MRI.getType(Reg: SrcReg); |
772 | unsigned SrcSize = SrcType.getSizeInBits(); |
773 | |
774 | // Currently we don't go into vectors. |
775 | if (!SrcType.isScalar()) |
776 | return CurrentBest; |
777 | |
778 | if (StartBit + Size > SrcSize) |
779 | return CurrentBest; |
780 | |
781 | if (StartBit == 0 && SrcType.getSizeInBits() == Size) |
782 | CurrentBest = SrcReg; |
783 | return findValueFromDefImpl(DefReg: SrcReg, StartBit, Size); |
784 | } |
785 | |
786 | /// Given an G_TRUNC op \p MI and a start bit and size, try to find |
787 | /// the origin of the value defined by that start position and size. |
788 | /// |
789 | /// \returns a register with the requested size, or the current best |
790 | /// register found during the current query. |
791 | Register findValueFromTrunc(MachineInstr &MI, unsigned StartBit, |
792 | unsigned Size) { |
793 | assert(MI.getOpcode() == TargetOpcode::G_TRUNC); |
794 | assert(Size > 0); |
795 | |
796 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
797 | LLT SrcType = MRI.getType(Reg: SrcReg); |
798 | |
799 | // Currently we don't go into vectors. |
800 | if (!SrcType.isScalar()) |
801 | return CurrentBest; |
802 | |
803 | return findValueFromDefImpl(DefReg: SrcReg, StartBit, Size); |
804 | } |
805 | |
806 | /// Internal implementation for findValueFromDef(). findValueFromDef() |
807 | /// initializes some data like the CurrentBest register, which this method |
808 | /// and its callees rely upon. |
809 | Register findValueFromDefImpl(Register DefReg, unsigned StartBit, |
810 | unsigned Size) { |
811 | std::optional<DefinitionAndSourceRegister> DefSrcReg = |
812 | getDefSrcRegIgnoringCopies(Reg: DefReg, MRI); |
813 | MachineInstr *Def = DefSrcReg->MI; |
814 | DefReg = DefSrcReg->Reg; |
815 | // If the instruction has a single def, then simply delegate the search. |
816 | // For unmerge however with multiple defs, we need to compute the offset |
817 | // into the source of the unmerge. |
818 | switch (Def->getOpcode()) { |
819 | case TargetOpcode::G_CONCAT_VECTORS: |
820 | return findValueFromConcat(Concat&: cast<GConcatVectors>(Val&: *Def), StartBit, Size); |
821 | case TargetOpcode::G_UNMERGE_VALUES: { |
822 | unsigned DefStartBit = 0; |
823 | unsigned DefSize = MRI.getType(Reg: DefReg).getSizeInBits(); |
824 | for (const auto &MO : Def->defs()) { |
825 | if (MO.getReg() == DefReg) |
826 | break; |
827 | DefStartBit += DefSize; |
828 | } |
829 | Register SrcReg = Def->getOperand(i: Def->getNumOperands() - 1).getReg(); |
830 | Register SrcOriginReg = |
831 | findValueFromDefImpl(DefReg: SrcReg, StartBit: StartBit + DefStartBit, Size); |
832 | if (SrcOriginReg) |
833 | return SrcOriginReg; |
834 | // Failed to find a further value. If the StartBit and Size perfectly |
835 | // covered the requested DefReg, return that since it's better than |
836 | // nothing. |
837 | if (StartBit == 0 && Size == DefSize) |
838 | return DefReg; |
839 | return CurrentBest; |
840 | } |
841 | case TargetOpcode::G_BUILD_VECTOR: |
842 | return findValueFromBuildVector(BV&: cast<GBuildVector>(Val&: *Def), StartBit, |
843 | Size); |
844 | case TargetOpcode::G_INSERT: |
845 | return findValueFromInsert(MI&: *Def, StartBit, Size); |
846 | case TargetOpcode::G_TRUNC: |
847 | return findValueFromTrunc(MI&: *Def, StartBit, Size); |
848 | case TargetOpcode::G_SEXT: |
849 | case TargetOpcode::G_ZEXT: |
850 | case TargetOpcode::G_ANYEXT: |
851 | return findValueFromExt(MI&: *Def, StartBit, Size); |
852 | default: |
853 | return CurrentBest; |
854 | } |
855 | } |
856 | |
857 | public: |
858 | ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, |
859 | const LegalizerInfo &Info) |
860 | : MRI(Mri), MIB(Builder), LI(Info) {} |
861 | |
862 | /// Try to find a source of the value defined in the def \p DefReg, starting |
863 | /// at position \p StartBit with size \p Size. |
864 | /// \returns a register with the requested size, or an empty Register if no |
865 | /// better value could be found. |
866 | Register findValueFromDef(Register DefReg, unsigned StartBit, |
867 | unsigned Size) { |
868 | CurrentBest = Register(); |
869 | Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); |
870 | return FoundReg != DefReg ? FoundReg : Register(); |
871 | } |
872 | |
873 | /// Try to combine the defs of an unmerge \p MI by attempting to find |
874 | /// values that provides the bits for each def reg. |
875 | /// \returns true if all the defs of the unmerge have been made dead. |
876 | bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, |
877 | SmallVectorImpl<Register> &UpdatedDefs) { |
878 | unsigned NumDefs = MI.getNumDefs(); |
879 | LLT DestTy = MRI.getType(Reg: MI.getReg(Idx: 0)); |
880 | |
881 | SmallBitVector DeadDefs(NumDefs); |
882 | for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
883 | Register DefReg = MI.getReg(Idx: DefIdx); |
884 | if (MRI.use_nodbg_empty(RegNo: DefReg)) { |
885 | DeadDefs[DefIdx] = true; |
886 | continue; |
887 | } |
888 | Register FoundVal = findValueFromDef(DefReg, StartBit: 0, Size: DestTy.getSizeInBits()); |
889 | if (!FoundVal) |
890 | continue; |
891 | if (MRI.getType(Reg: FoundVal) != DestTy) |
892 | continue; |
893 | |
894 | replaceRegOrBuildCopy(DstReg: DefReg, SrcReg: FoundVal, MRI, Builder&: MIB, UpdatedDefs, |
895 | Observer); |
896 | // We only want to replace the uses, not the def of the old reg. |
897 | Observer.changingInstr(MI); |
898 | MI.getOperand(i: DefIdx).setReg(DefReg); |
899 | Observer.changedInstr(MI); |
900 | DeadDefs[DefIdx] = true; |
901 | } |
902 | return DeadDefs.all(); |
903 | } |
904 | |
905 | GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size, |
906 | unsigned &DefOperandIdx) { |
907 | if (Register Def = findValueFromDefImpl(DefReg: Reg, StartBit: 0, Size)) { |
908 | if (auto *Unmerge = dyn_cast<GUnmerge>(Val: MRI.getVRegDef(Reg: Def))) { |
909 | DefOperandIdx = |
910 | Unmerge->findRegisterDefOperandIdx(Reg: Def, /*TRI=*/TRI: nullptr); |
911 | return Unmerge; |
912 | } |
913 | } |
914 | return nullptr; |
915 | } |
916 | |
917 | // Check if sequence of elements from merge-like instruction is defined by |
918 | // another sequence of elements defined by unmerge. Most often this is the |
919 | // same sequence. Search for elements using findValueFromDefImpl. |
920 | bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, |
921 | GUnmerge *Unmerge, unsigned UnmergeIdxStart, |
922 | unsigned NumElts, unsigned EltSize, |
923 | bool AllowUndef) { |
924 | assert(MergeStartIdx + NumElts <= MI.getNumSources()); |
925 | for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { |
926 | unsigned EltUnmergeIdx; |
927 | GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( |
928 | Reg: MI.getSourceReg(I: i), Size: EltSize, DefOperandIdx&: EltUnmergeIdx); |
929 | // Check if source i comes from the same Unmerge. |
930 | if (EltUnmerge == Unmerge) { |
931 | // Check that source i's def has same index in sequence in Unmerge. |
932 | if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) |
933 | return false; |
934 | } else if (!AllowUndef || |
935 | MRI.getVRegDef(Reg: MI.getSourceReg(I: i))->getOpcode() != |
936 | TargetOpcode::G_IMPLICIT_DEF) |
937 | return false; |
938 | } |
939 | return true; |
940 | } |
941 | |
942 | bool tryCombineMergeLike(GMergeLikeInstr &MI, |
943 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
944 | SmallVectorImpl<Register> &UpdatedDefs, |
945 | GISelChangeObserver &Observer) { |
946 | Register Elt0 = MI.getSourceReg(I: 0); |
947 | LLT EltTy = MRI.getType(Reg: Elt0); |
948 | unsigned EltSize = EltTy.getSizeInBits(); |
949 | |
950 | unsigned Elt0UnmergeIdx; |
951 | // Search for unmerge that will be candidate for combine. |
952 | auto *Unmerge = findUnmergeThatDefinesReg(Reg: Elt0, Size: EltSize, DefOperandIdx&: Elt0UnmergeIdx); |
953 | if (!Unmerge) |
954 | return false; |
955 | |
956 | unsigned NumMIElts = MI.getNumSources(); |
957 | Register Dst = MI.getReg(Idx: 0); |
958 | LLT DstTy = MRI.getType(Reg: Dst); |
959 | Register UnmergeSrc = Unmerge->getSourceReg(); |
960 | LLT UnmergeSrcTy = MRI.getType(Reg: UnmergeSrc); |
961 | |
962 | // Recognize copy of UnmergeSrc to Dst. |
963 | // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst. |
964 | // |
965 | // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty) |
966 | // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ... |
967 | // |
968 | // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) |
969 | if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { |
970 | if (!isSequenceFromUnmerge(MI, MergeStartIdx: 0, Unmerge, UnmergeIdxStart: 0, NumElts: NumMIElts, EltSize, |
971 | /*AllowUndef=*/AllowUndef: DstTy.isVector())) |
972 | return false; |
973 | |
974 | replaceRegOrBuildCopy(DstReg: Dst, SrcReg: UnmergeSrc, MRI, Builder&: MIB, UpdatedDefs, Observer); |
975 | DeadInsts.push_back(Elt: &MI); |
976 | return true; |
977 | } |
978 | |
979 | // Recognize UnmergeSrc that can be unmerged to DstTy directly. |
980 | // Types have to be either both vector or both non-vector types. |
981 | // Merge-like opcodes are combined one at the time. First one creates new |
982 | // unmerge, following should use the same unmerge (builder performs CSE). |
983 | // |
984 | // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
985 | // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1 |
986 | // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 |
987 | // |
988 | // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc |
989 | if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
990 | (Elt0UnmergeIdx % NumMIElts == 0) && |
991 | getCoverTy(OrigTy: UnmergeSrcTy, TargetTy: DstTy) == UnmergeSrcTy) { |
992 | if (!isSequenceFromUnmerge(MI, MergeStartIdx: 0, Unmerge, UnmergeIdxStart: Elt0UnmergeIdx, NumElts: NumMIElts, |
993 | EltSize, AllowUndef: false)) |
994 | return false; |
995 | MIB.setInstrAndDebugLoc(MI); |
996 | auto NewUnmerge = MIB.buildUnmerge(Res: DstTy, Op: Unmerge->getSourceReg()); |
997 | unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits(); |
998 | replaceRegOrBuildCopy(DstReg: Dst, SrcReg: NewUnmerge.getReg(Idx: DstIdx), MRI, Builder&: MIB, |
999 | UpdatedDefs, Observer); |
1000 | DeadInsts.push_back(Elt: &MI); |
1001 | return true; |
1002 | } |
1003 | |
1004 | // Recognize when multiple unmerged sources with UnmergeSrcTy type |
1005 | // can be merged into Dst with DstTy type directly. |
1006 | // Types have to be either both vector or both non-vector types. |
1007 | |
1008 | // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
1009 | // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy) |
1010 | // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3 |
1011 | // |
1012 | // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc |
1013 | |
1014 | if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
1015 | getCoverTy(OrigTy: DstTy, TargetTy: UnmergeSrcTy) == DstTy) { |
1016 | SmallVector<Register, 4> ConcatSources; |
1017 | unsigned NumElts = Unmerge->getNumDefs(); |
1018 | for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) { |
1019 | unsigned EltUnmergeIdx; |
1020 | auto *UnmergeI = findUnmergeThatDefinesReg(Reg: MI.getSourceReg(I: i), |
1021 | Size: EltSize, DefOperandIdx&: EltUnmergeIdx); |
1022 | // All unmerges have to be the same size. |
1023 | if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || |
1024 | (EltUnmergeIdx != 0)) |
1025 | return false; |
1026 | if (!isSequenceFromUnmerge(MI, MergeStartIdx: i, Unmerge: UnmergeI, UnmergeIdxStart: 0, NumElts, EltSize, |
1027 | AllowUndef: false)) |
1028 | return false; |
1029 | ConcatSources.push_back(Elt: UnmergeI->getSourceReg()); |
1030 | } |
1031 | |
1032 | MIB.setInstrAndDebugLoc(MI); |
1033 | MIB.buildMergeLikeInstr(Res: Dst, Ops: ConcatSources); |
1034 | DeadInsts.push_back(Elt: &MI); |
1035 | return true; |
1036 | } |
1037 | |
1038 | return false; |
1039 | } |
1040 | }; |
1041 | |
1042 | bool tryCombineUnmergeValues(GUnmerge &MI, |
1043 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1044 | SmallVectorImpl<Register> &UpdatedDefs, |
1045 | GISelChangeObserver &Observer) { |
1046 | unsigned NumDefs = MI.getNumDefs(); |
1047 | Register SrcReg = MI.getSourceReg(); |
1048 | MachineInstr *SrcDef = getDefIgnoringCopies(Reg: SrcReg, MRI); |
1049 | if (!SrcDef) |
1050 | return false; |
1051 | |
1052 | LLT OpTy = MRI.getType(Reg: SrcReg); |
1053 | LLT DestTy = MRI.getType(Reg: MI.getReg(Idx: 0)); |
1054 | unsigned SrcDefIdx = getDefIndex(MI: *SrcDef, SearchDef: SrcReg); |
1055 | |
1056 | Builder.setInstrAndDebugLoc(MI); |
1057 | |
1058 | ArtifactValueFinder Finder(MRI, Builder, LI); |
1059 | if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { |
1060 | markInstAndDefDead(MI, DefMI&: *SrcDef, DeadInsts, DefIdx: SrcDefIdx); |
1061 | return true; |
1062 | } |
1063 | |
1064 | if (auto *SrcUnmerge = dyn_cast<GUnmerge>(Val: SrcDef)) { |
1065 | // %0:_(<4 x s16>) = G_FOO |
1066 | // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 |
1067 | // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 |
1068 | // |
1069 | // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 |
1070 | Register SrcUnmergeSrc = SrcUnmerge->getSourceReg(); |
1071 | LLT SrcUnmergeSrcTy = MRI.getType(Reg: SrcUnmergeSrc); |
1072 | |
1073 | // If we need to decrease the number of vector elements in the result type |
1074 | // of an unmerge, this would involve the creation of an equivalent unmerge |
1075 | // to copy back to the original result registers. |
1076 | LegalizeActionStep ActionStep = LI.getAction( |
1077 | Query: {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); |
1078 | switch (ActionStep.Action) { |
1079 | case LegalizeActions::Lower: |
1080 | case LegalizeActions::Unsupported: |
1081 | break; |
1082 | case LegalizeActions::FewerElements: |
1083 | case LegalizeActions::NarrowScalar: |
1084 | if (ActionStep.TypeIdx == 1) |
1085 | return false; |
1086 | break; |
1087 | default: |
1088 | return false; |
1089 | } |
1090 | |
1091 | auto NewUnmerge = Builder.buildUnmerge(Res: DestTy, Op: SrcUnmergeSrc); |
1092 | |
1093 | // TODO: Should we try to process out the other defs now? If the other |
1094 | // defs of the source unmerge are also unmerged, we end up with a separate |
1095 | // unmerge for each one. |
1096 | for (unsigned I = 0; I != NumDefs; ++I) { |
1097 | Register Def = MI.getReg(Idx: I); |
1098 | replaceRegOrBuildCopy(DstReg: Def, SrcReg: NewUnmerge.getReg(Idx: SrcDefIdx * NumDefs + I), |
1099 | MRI, Builder, UpdatedDefs, Observer); |
1100 | } |
1101 | |
1102 | markInstAndDefDead(MI, DefMI&: *SrcUnmerge, DeadInsts, DefIdx: SrcDefIdx); |
1103 | return true; |
1104 | } |
1105 | |
1106 | MachineInstr *MergeI = SrcDef; |
1107 | unsigned ConvertOp = 0; |
1108 | |
1109 | // Handle intermediate conversions |
1110 | unsigned SrcOp = SrcDef->getOpcode(); |
1111 | if (isArtifactCast(Opc: SrcOp)) { |
1112 | ConvertOp = SrcOp; |
1113 | MergeI = getDefIgnoringCopies(Reg: SrcDef->getOperand(i: 1).getReg(), MRI); |
1114 | } |
1115 | |
1116 | if (!MergeI || !canFoldMergeOpcode(MergeOp: MergeI->getOpcode(), |
1117 | ConvertOp, OpTy, DestTy)) { |
1118 | // We might have a chance to combine later by trying to combine |
1119 | // unmerge(cast) first |
1120 | return tryFoldUnmergeCast(MI, CastMI&: *SrcDef, DeadInsts, UpdatedDefs); |
1121 | } |
1122 | |
1123 | const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; |
1124 | |
1125 | if (NumMergeRegs < NumDefs) { |
1126 | if (NumDefs % NumMergeRegs != 0) |
1127 | return false; |
1128 | |
1129 | Builder.setInstr(MI); |
1130 | // Transform to UNMERGEs, for example |
1131 | // %1 = G_MERGE_VALUES %4, %5 |
1132 | // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 |
1133 | // to |
1134 | // %9, %10 = G_UNMERGE_VALUES %4 |
1135 | // %11, %12 = G_UNMERGE_VALUES %5 |
1136 | |
1137 | const unsigned NewNumDefs = NumDefs / NumMergeRegs; |
1138 | for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { |
1139 | SmallVector<Register, 8> DstRegs; |
1140 | for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; |
1141 | ++j, ++DefIdx) |
1142 | DstRegs.push_back(Elt: MI.getReg(Idx: DefIdx)); |
1143 | |
1144 | if (ConvertOp) { |
1145 | LLT MergeDstTy = MRI.getType(Reg: SrcDef->getOperand(i: 0).getReg()); |
1146 | |
1147 | // This is a vector that is being split and casted. Extract to the |
1148 | // element type, and do the conversion on the scalars (or smaller |
1149 | // vectors). |
1150 | LLT MergeEltTy = MergeDstTy.divide(Factor: NumMergeRegs); |
1151 | |
1152 | // Handle split to smaller vectors, with conversions. |
1153 | // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) |
1154 | // %3(<8 x s16>) = G_SEXT %2 |
1155 | // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = |
1156 | // G_UNMERGE_VALUES %3 |
1157 | // |
1158 | // => |
1159 | // |
1160 | // %8(<4 x s16>) = G_SEXT %0 |
1161 | // %9(<4 x s16>) = G_SEXT %1 |
1162 | // %4(<2 x s16>), %5(<2 x s16>) = G_UNMERGE_VALUES %8 |
1163 | // %7(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %9 |
1164 | |
1165 | Register TmpReg = MRI.createGenericVirtualRegister(Ty: MergeEltTy); |
1166 | Builder.buildInstr(Opc: ConvertOp, DstOps: {TmpReg}, |
1167 | SrcOps: {MergeI->getOperand(i: Idx + 1).getReg()}); |
1168 | Builder.buildUnmerge(Res: DstRegs, Op: TmpReg); |
1169 | } else { |
1170 | Builder.buildUnmerge(Res: DstRegs, Op: MergeI->getOperand(i: Idx + 1).getReg()); |
1171 | } |
1172 | UpdatedDefs.append(in_start: DstRegs.begin(), in_end: DstRegs.end()); |
1173 | } |
1174 | |
1175 | } else if (NumMergeRegs > NumDefs) { |
1176 | if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0) |
1177 | return false; |
1178 | |
1179 | Builder.setInstr(MI); |
1180 | // Transform to MERGEs |
1181 | // %6 = G_MERGE_VALUES %17, %18, %19, %20 |
1182 | // %7, %8 = G_UNMERGE_VALUES %6 |
1183 | // to |
1184 | // %7 = G_MERGE_VALUES %17, %18 |
1185 | // %8 = G_MERGE_VALUES %19, %20 |
1186 | |
1187 | const unsigned NumRegs = NumMergeRegs / NumDefs; |
1188 | for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
1189 | SmallVector<Register, 8> Regs; |
1190 | for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; |
1191 | ++j, ++Idx) |
1192 | Regs.push_back(Elt: MergeI->getOperand(i: Idx).getReg()); |
1193 | |
1194 | Register DefReg = MI.getReg(Idx: DefIdx); |
1195 | Builder.buildMergeLikeInstr(Res: DefReg, Ops: Regs); |
1196 | UpdatedDefs.push_back(Elt: DefReg); |
1197 | } |
1198 | |
1199 | } else { |
1200 | LLT MergeSrcTy = MRI.getType(Reg: MergeI->getOperand(i: 1).getReg()); |
1201 | |
1202 | if (!ConvertOp && DestTy != MergeSrcTy) |
1203 | ConvertOp = TargetOpcode::G_BITCAST; |
1204 | |
1205 | if (ConvertOp) { |
1206 | Builder.setInstr(MI); |
1207 | |
1208 | for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
1209 | Register DefReg = MI.getOperand(i: Idx).getReg(); |
1210 | Register MergeSrc = MergeI->getOperand(i: Idx + 1).getReg(); |
1211 | |
1212 | if (!MRI.use_empty(RegNo: DefReg)) { |
1213 | Builder.buildInstr(Opc: ConvertOp, DstOps: {DefReg}, SrcOps: {MergeSrc}); |
1214 | UpdatedDefs.push_back(Elt: DefReg); |
1215 | } |
1216 | } |
1217 | |
1218 | markInstAndDefDead(MI, DefMI&: *MergeI, DeadInsts); |
1219 | return true; |
1220 | } |
1221 | |
1222 | assert(DestTy == MergeSrcTy && |
1223 | "Bitcast and the other kinds of conversions should " |
1224 | "have happened earlier" ); |
1225 | |
1226 | Builder.setInstr(MI); |
1227 | for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
1228 | Register DstReg = MI.getOperand(i: Idx).getReg(); |
1229 | Register SrcReg = MergeI->getOperand(i: Idx + 1).getReg(); |
1230 | replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs, |
1231 | Observer); |
1232 | } |
1233 | } |
1234 | |
1235 | markInstAndDefDead(MI, DefMI&: *MergeI, DeadInsts); |
1236 | return true; |
1237 | } |
1238 | |
1239 | bool (MachineInstr &MI, |
1240 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1241 | SmallVectorImpl<Register> &UpdatedDefs) { |
1242 | assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); |
1243 | |
1244 | // Try to use the source registers from a G_MERGE_VALUES |
1245 | // |
1246 | // %2 = G_MERGE_VALUES %0, %1 |
1247 | // %3 = G_EXTRACT %2, N |
1248 | // => |
1249 | // |
1250 | // for N < %2.getSizeInBits() / 2 |
1251 | // %3 = G_EXTRACT %0, N |
1252 | // |
1253 | // for N >= %2.getSizeInBits() / 2 |
1254 | // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() |
1255 | |
1256 | Register SrcReg = lookThroughCopyInstrs(Reg: MI.getOperand(i: 1).getReg()); |
1257 | MachineInstr *MergeI = MRI.getVRegDef(Reg: SrcReg); |
1258 | if (!MergeI || !isa<GMergeLikeInstr>(Val: MergeI)) |
1259 | return false; |
1260 | |
1261 | Register DstReg = MI.getOperand(i: 0).getReg(); |
1262 | LLT DstTy = MRI.getType(Reg: DstReg); |
1263 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
1264 | |
1265 | // TODO: Do we need to check if the resulting extract is supported? |
1266 | unsigned = DstTy.getSizeInBits(); |
1267 | unsigned Offset = MI.getOperand(i: 2).getImm(); |
1268 | unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; |
1269 | unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; |
1270 | unsigned MergeSrcIdx = Offset / MergeSrcSize; |
1271 | |
1272 | // Compute the offset of the last bit the extract needs. |
1273 | unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; |
1274 | |
1275 | // Can't handle the case where the extract spans multiple inputs. |
1276 | if (MergeSrcIdx != EndMergeSrcIdx) |
1277 | return false; |
1278 | |
1279 | // TODO: We could modify MI in place in most cases. |
1280 | Builder.setInstr(MI); |
1281 | Builder.buildExtract(Res: DstReg, Src: MergeI->getOperand(i: MergeSrcIdx + 1).getReg(), |
1282 | Index: Offset - MergeSrcIdx * MergeSrcSize); |
1283 | UpdatedDefs.push_back(Elt: DstReg); |
1284 | markInstAndDefDead(MI, DefMI&: *MergeI, DeadInsts); |
1285 | return true; |
1286 | } |
1287 | |
1288 | /// Try to combine away MI. |
1289 | /// Returns true if it combined away the MI. |
1290 | /// Adds instructions that are dead as a result of the combine |
1291 | /// into DeadInsts, which can include MI. |
1292 | bool tryCombineInstruction(MachineInstr &MI, |
1293 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1294 | GISelObserverWrapper &WrapperObserver) { |
1295 | ArtifactValueFinder Finder(MRI, Builder, LI); |
1296 | |
1297 | // This might be a recursive call, and we might have DeadInsts already |
1298 | // populated. To avoid bad things happening later with multiple vreg defs |
1299 | // etc, process the dead instructions now if any. |
1300 | if (!DeadInsts.empty()) |
1301 | deleteMarkedDeadInsts(DeadInsts, WrapperObserver); |
1302 | |
1303 | // Put here every vreg that was redefined in such a way that it's at least |
1304 | // possible that one (or more) of its users (immediate or COPY-separated) |
1305 | // could become artifact combinable with the new definition (or the |
1306 | // instruction reachable from it through a chain of copies if any). |
1307 | SmallVector<Register, 4> UpdatedDefs; |
1308 | bool Changed = false; |
1309 | switch (MI.getOpcode()) { |
1310 | default: |
1311 | return false; |
1312 | case TargetOpcode::G_ANYEXT: |
1313 | Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, Observer&: WrapperObserver); |
1314 | break; |
1315 | case TargetOpcode::G_ZEXT: |
1316 | Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, Observer&: WrapperObserver); |
1317 | break; |
1318 | case TargetOpcode::G_SEXT: |
1319 | Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); |
1320 | break; |
1321 | case TargetOpcode::G_UNMERGE_VALUES: |
1322 | Changed = tryCombineUnmergeValues(MI&: cast<GUnmerge>(Val&: MI), DeadInsts, |
1323 | UpdatedDefs, Observer&: WrapperObserver); |
1324 | break; |
1325 | case TargetOpcode::G_MERGE_VALUES: |
1326 | case TargetOpcode::G_BUILD_VECTOR: |
1327 | case TargetOpcode::G_CONCAT_VECTORS: |
1328 | // If any of the users of this merge are an unmerge, then add them to the |
1329 | // artifact worklist in case there's folding that can be done looking up. |
1330 | for (MachineInstr &U : MRI.use_instructions(Reg: MI.getOperand(i: 0).getReg())) { |
1331 | if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || |
1332 | U.getOpcode() == TargetOpcode::G_TRUNC) { |
1333 | UpdatedDefs.push_back(Elt: MI.getOperand(i: 0).getReg()); |
1334 | break; |
1335 | } |
1336 | } |
1337 | Changed = Finder.tryCombineMergeLike(MI&: cast<GMergeLikeInstr>(Val&: MI), DeadInsts, |
1338 | UpdatedDefs, Observer&: WrapperObserver); |
1339 | break; |
1340 | case TargetOpcode::G_EXTRACT: |
1341 | Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); |
1342 | break; |
1343 | case TargetOpcode::G_TRUNC: |
1344 | Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, Observer&: WrapperObserver); |
1345 | if (!Changed) { |
1346 | // Try to combine truncates away even if they are legal. As all artifact |
1347 | // combines at the moment look only "up" the def-use chains, we achieve |
1348 | // that by throwing truncates' users (with look through copies) into the |
1349 | // ArtifactList again. |
1350 | UpdatedDefs.push_back(Elt: MI.getOperand(i: 0).getReg()); |
1351 | } |
1352 | break; |
1353 | } |
1354 | // If the main loop through the ArtifactList found at least one combinable |
1355 | // pair of artifacts, not only combine it away (as done above), but also |
1356 | // follow the def-use chain from there to combine everything that can be |
1357 | // combined within this def-use chain of artifacts. |
1358 | while (!UpdatedDefs.empty()) { |
1359 | Register NewDef = UpdatedDefs.pop_back_val(); |
1360 | assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg" ); |
1361 | for (MachineInstr &Use : MRI.use_instructions(Reg: NewDef)) { |
1362 | switch (Use.getOpcode()) { |
1363 | // Keep this list in sync with the list of all artifact combines. |
1364 | case TargetOpcode::G_ANYEXT: |
1365 | case TargetOpcode::G_ZEXT: |
1366 | case TargetOpcode::G_SEXT: |
1367 | case TargetOpcode::G_UNMERGE_VALUES: |
1368 | case TargetOpcode::G_EXTRACT: |
1369 | case TargetOpcode::G_TRUNC: |
1370 | case TargetOpcode::G_BUILD_VECTOR: |
1371 | // Adding Use to ArtifactList. |
1372 | WrapperObserver.changedInstr(MI&: Use); |
1373 | break; |
1374 | case TargetOpcode::G_ASSERT_SEXT: |
1375 | case TargetOpcode::G_ASSERT_ZEXT: |
1376 | case TargetOpcode::G_ASSERT_ALIGN: |
1377 | case TargetOpcode::COPY: { |
1378 | Register Copy = Use.getOperand(i: 0).getReg(); |
1379 | if (Copy.isVirtual()) |
1380 | UpdatedDefs.push_back(Elt: Copy); |
1381 | break; |
1382 | } |
1383 | default: |
1384 | // If we do not have an artifact combine for the opcode, there is no |
1385 | // point in adding it to the ArtifactList as nothing interesting will |
1386 | // be done to it anyway. |
1387 | break; |
1388 | } |
1389 | } |
1390 | } |
1391 | return Changed; |
1392 | } |
1393 | |
1394 | private: |
1395 | static Register getArtifactSrcReg(const MachineInstr &MI) { |
1396 | switch (MI.getOpcode()) { |
1397 | case TargetOpcode::COPY: |
1398 | case TargetOpcode::G_TRUNC: |
1399 | case TargetOpcode::G_ZEXT: |
1400 | case TargetOpcode::G_ANYEXT: |
1401 | case TargetOpcode::G_SEXT: |
1402 | case TargetOpcode::G_EXTRACT: |
1403 | case TargetOpcode::G_ASSERT_SEXT: |
1404 | case TargetOpcode::G_ASSERT_ZEXT: |
1405 | case TargetOpcode::G_ASSERT_ALIGN: |
1406 | return MI.getOperand(i: 1).getReg(); |
1407 | case TargetOpcode::G_UNMERGE_VALUES: |
1408 | return MI.getOperand(i: MI.getNumOperands() - 1).getReg(); |
1409 | default: |
1410 | llvm_unreachable("Not a legalization artifact happen" ); |
1411 | } |
1412 | } |
1413 | |
1414 | /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI |
1415 | /// (either by killing it or changing operands) results in DefMI being dead |
1416 | /// too. In-between COPYs or artifact-casts are also collected if they are |
1417 | /// dead. |
1418 | /// MI is not marked dead. |
1419 | void markDefDead(MachineInstr &MI, MachineInstr &DefMI, |
1420 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1421 | unsigned DefIdx = 0) { |
1422 | // Collect all the copy instructions that are made dead, due to deleting |
1423 | // this instruction. Collect all of them until the Trunc(DefMI). |
1424 | // Eg, |
1425 | // %1(s1) = G_TRUNC %0(s32) |
1426 | // %2(s1) = COPY %1(s1) |
1427 | // %3(s1) = COPY %2(s1) |
1428 | // %4(s32) = G_ANYEXT %3(s1) |
1429 | // In this case, we would have replaced %4 with a copy of %0, |
1430 | // and as a result, %3, %2, %1 are dead. |
1431 | MachineInstr *PrevMI = &MI; |
1432 | while (PrevMI != &DefMI) { |
1433 | Register PrevRegSrc = getArtifactSrcReg(MI: *PrevMI); |
1434 | |
1435 | MachineInstr *TmpDef = MRI.getVRegDef(Reg: PrevRegSrc); |
1436 | if (MRI.hasOneUse(RegNo: PrevRegSrc)) { |
1437 | if (TmpDef != &DefMI) { |
1438 | assert((TmpDef->getOpcode() == TargetOpcode::COPY || |
1439 | isArtifactCast(TmpDef->getOpcode()) || |
1440 | isPreISelGenericOptimizationHint(TmpDef->getOpcode())) && |
1441 | "Expecting copy or artifact cast here" ); |
1442 | |
1443 | DeadInsts.push_back(Elt: TmpDef); |
1444 | } |
1445 | } else |
1446 | break; |
1447 | PrevMI = TmpDef; |
1448 | } |
1449 | |
1450 | if (PrevMI == &DefMI) { |
1451 | unsigned I = 0; |
1452 | bool IsDead = true; |
1453 | for (MachineOperand &Def : DefMI.defs()) { |
1454 | if (I != DefIdx) { |
1455 | if (!MRI.use_empty(RegNo: Def.getReg())) { |
1456 | IsDead = false; |
1457 | break; |
1458 | } |
1459 | } else { |
1460 | if (!MRI.hasOneUse(RegNo: DefMI.getOperand(i: DefIdx).getReg())) |
1461 | break; |
1462 | } |
1463 | |
1464 | ++I; |
1465 | } |
1466 | |
1467 | if (IsDead) |
1468 | DeadInsts.push_back(Elt: &DefMI); |
1469 | } |
1470 | } |
1471 | |
1472 | /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be |
1473 | /// dead due to MI being killed, then mark DefMI as dead too. |
1474 | /// Some of the combines (extends(trunc)), try to walk through redundant |
1475 | /// copies in between the extends and the truncs, and this attempts to collect |
1476 | /// the in between copies if they're dead. |
1477 | void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, |
1478 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
1479 | unsigned DefIdx = 0) { |
1480 | DeadInsts.push_back(Elt: &MI); |
1481 | markDefDead(MI, DefMI, DeadInsts, DefIdx); |
1482 | } |
1483 | |
1484 | /// Erase the dead instructions in the list and call the observer hooks. |
1485 | /// Normally the Legalizer will deal with erasing instructions that have been |
1486 | /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to |
1487 | /// process instructions which have been marked dead, but otherwise break the |
1488 | /// MIR by introducing multiple vreg defs. For those cases, allow the combines |
1489 | /// to explicitly delete the instructions before we run into trouble. |
1490 | void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts, |
1491 | GISelObserverWrapper &WrapperObserver) { |
1492 | for (auto *DeadMI : DeadInsts) { |
1493 | LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n" ); |
1494 | WrapperObserver.erasingInstr(MI&: *DeadMI); |
1495 | DeadMI->eraseFromParent(); |
1496 | } |
1497 | DeadInsts.clear(); |
1498 | } |
1499 | |
1500 | /// Checks if the target legalizer info has specified anything about the |
1501 | /// instruction, or if unsupported. |
1502 | bool isInstUnsupported(const LegalityQuery &Query) const { |
1503 | using namespace LegalizeActions; |
1504 | auto Step = LI.getAction(Query); |
1505 | return Step.Action == Unsupported || Step.Action == NotFound; |
1506 | } |
1507 | |
1508 | bool isInstLegal(const LegalityQuery &Query) const { |
1509 | return LI.getAction(Query).Action == LegalizeActions::Legal; |
1510 | } |
1511 | |
1512 | bool isConstantUnsupported(LLT Ty) const { |
1513 | if (!Ty.isVector()) |
1514 | return isInstUnsupported(Query: {TargetOpcode::G_CONSTANT, {Ty}}); |
1515 | |
1516 | LLT EltTy = Ty.getElementType(); |
1517 | return isInstUnsupported(Query: {TargetOpcode::G_CONSTANT, {EltTy}}) || |
1518 | isInstUnsupported(Query: {TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); |
1519 | } |
1520 | |
1521 | /// Looks through copy instructions and returns the actual |
1522 | /// source register. |
1523 | Register lookThroughCopyInstrs(Register Reg) { |
1524 | Register TmpReg = getSrcRegIgnoringCopies(Reg, MRI); |
1525 | return TmpReg.isValid() ? TmpReg : Reg; |
1526 | } |
1527 | }; |
1528 | |
1529 | } // namespace llvm |
1530 | |
1531 | #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
1532 | |