1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16#include "llvm/CodeGen/GlobalISel/CallLowering.h"
17#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
19#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
22#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24#include "llvm/CodeGen/GlobalISel/Utils.h"
25#include "llvm/CodeGen/MachineConstantPool.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RuntimeLibcalls.h"
29#include "llvm/CodeGen/TargetFrameLowering.h"
30#include "llvm/CodeGen/TargetInstrInfo.h"
31#include "llvm/CodeGen/TargetLowering.h"
32#include "llvm/CodeGen/TargetOpcodes.h"
33#include "llvm/CodeGen/TargetSubtargetInfo.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/MathExtras.h"
37#include "llvm/Support/raw_ostream.h"
38#include "llvm/Target/TargetMachine.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy = LLT::scalarOrVector(
73 EC: ElementCount::getFixed(MinVal: LeftoverSize / EltSize), ScalarSize: EltSize);
74 } else {
75 LeftoverTy = LLT::scalar(SizeInBits: LeftoverSize);
76 }
77
78 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
79 return std::make_pair(x&: NumParts, y&: NumLeftover);
80}
81
82static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
83
84 if (!Ty.isScalar())
85 return nullptr;
86
87 switch (Ty.getSizeInBits()) {
88 case 16:
89 return Type::getHalfTy(C&: Ctx);
90 case 32:
91 return Type::getFloatTy(C&: Ctx);
92 case 64:
93 return Type::getDoubleTy(C&: Ctx);
94 case 80:
95 return Type::getX86_FP80Ty(C&: Ctx);
96 case 128:
97 return Type::getFP128Ty(C&: Ctx);
98 default:
99 return nullptr;
100 }
101}
102
103LegalizerHelper::LegalizerHelper(MachineFunction &MF,
104 GISelChangeObserver &Observer,
105 MachineIRBuilder &Builder)
106 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
107 LI(*MF.getSubtarget().getLegalizerInfo()),
108 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
109
110LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
111 GISelChangeObserver &Observer,
112 MachineIRBuilder &B, GISelKnownBits *KB)
113 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
114 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
115
116LegalizerHelper::LegalizeResult
117LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
118 LostDebugLocObserver &LocObserver) {
119 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
120
121 MIRBuilder.setInstrAndDebugLoc(MI);
122
123 if (isa<GIntrinsic>(Val: MI))
124 return LI.legalizeIntrinsic(Helper&: *this, MI) ? Legalized : UnableToLegalize;
125 auto Step = LI.getAction(MI, MRI);
126 switch (Step.Action) {
127 case Legal:
128 LLVM_DEBUG(dbgs() << ".. Already legal\n");
129 return AlreadyLegal;
130 case Libcall:
131 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
132 return libcall(MI, LocObserver);
133 case NarrowScalar:
134 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
135 return narrowScalar(MI, TypeIdx: Step.TypeIdx, NarrowTy: Step.NewType);
136 case WidenScalar:
137 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
138 return widenScalar(MI, TypeIdx: Step.TypeIdx, WideTy: Step.NewType);
139 case Bitcast:
140 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
141 return bitcast(MI, TypeIdx: Step.TypeIdx, Ty: Step.NewType);
142 case Lower:
143 LLVM_DEBUG(dbgs() << ".. Lower\n");
144 return lower(MI, TypeIdx: Step.TypeIdx, Ty: Step.NewType);
145 case FewerElements:
146 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
147 return fewerElementsVector(MI, TypeIdx: Step.TypeIdx, NarrowTy: Step.NewType);
148 case MoreElements:
149 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
150 return moreElementsVector(MI, TypeIdx: Step.TypeIdx, MoreTy: Step.NewType);
151 case Custom:
152 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
153 return LI.legalizeCustom(Helper&: *this, MI, LocObserver) ? Legalized
154 : UnableToLegalize;
155 default:
156 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
157 return UnableToLegalize;
158 }
159}
160
161void LegalizerHelper::insertParts(Register DstReg,
162 LLT ResultTy, LLT PartTy,
163 ArrayRef<Register> PartRegs,
164 LLT LeftoverTy,
165 ArrayRef<Register> LeftoverRegs) {
166 if (!LeftoverTy.isValid()) {
167 assert(LeftoverRegs.empty());
168
169 if (!ResultTy.isVector()) {
170 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: PartRegs);
171 return;
172 }
173
174 if (PartTy.isVector())
175 MIRBuilder.buildConcatVectors(Res: DstReg, Ops: PartRegs);
176 else
177 MIRBuilder.buildBuildVector(Res: DstReg, Ops: PartRegs);
178 return;
179 }
180
181 // Merge sub-vectors with different number of elements and insert into DstReg.
182 if (ResultTy.isVector()) {
183 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
184 SmallVector<Register, 8> AllRegs;
185 for (auto Reg : concat<const Register>(Ranges&: PartRegs, Ranges&: LeftoverRegs))
186 AllRegs.push_back(Elt: Reg);
187 return mergeMixedSubvectors(DstReg, PartRegs: AllRegs);
188 }
189
190 SmallVector<Register> GCDRegs;
191 LLT GCDTy = getGCDType(OrigTy: getGCDType(OrigTy: ResultTy, TargetTy: LeftoverTy), TargetTy: PartTy);
192 for (auto PartReg : concat<const Register>(Ranges&: PartRegs, Ranges&: LeftoverRegs))
193 extractGCDType(Parts&: GCDRegs, GCDTy, SrcReg: PartReg);
194 LLT ResultLCMTy = buildLCMMergePieces(DstTy: ResultTy, NarrowTy: LeftoverTy, GCDTy, VRegs&: GCDRegs);
195 buildWidenedRemergeToDst(DstReg, LCMTy: ResultLCMTy, RemergeRegs: GCDRegs);
196}
197
198void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
199 Register Reg) {
200 LLT Ty = MRI.getType(Reg);
201 SmallVector<Register, 8> RegElts;
202 extractParts(Reg, Ty: Ty.getScalarType(), NumParts: Ty.getNumElements(), VRegs&: RegElts,
203 MIRBuilder, MRI);
204 Elts.append(RHS: RegElts);
205}
206
207/// Merge \p PartRegs with different types into \p DstReg.
208void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
209 ArrayRef<Register> PartRegs) {
210 SmallVector<Register, 8> AllElts;
211 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
212 appendVectorElts(Elts&: AllElts, Reg: PartRegs[i]);
213
214 Register Leftover = PartRegs[PartRegs.size() - 1];
215 if (MRI.getType(Reg: Leftover).isScalar())
216 AllElts.push_back(Elt: Leftover);
217 else
218 appendVectorElts(Elts&: AllElts, Reg: Leftover);
219
220 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: AllElts);
221}
222
223/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
224static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
225 const MachineInstr &MI) {
226 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
227
228 const int StartIdx = Regs.size();
229 const int NumResults = MI.getNumOperands() - 1;
230 Regs.resize(N: Regs.size() + NumResults);
231 for (int I = 0; I != NumResults; ++I)
232 Regs[StartIdx + I] = MI.getOperand(i: I).getReg();
233}
234
235void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
236 LLT GCDTy, Register SrcReg) {
237 LLT SrcTy = MRI.getType(Reg: SrcReg);
238 if (SrcTy == GCDTy) {
239 // If the source already evenly divides the result type, we don't need to do
240 // anything.
241 Parts.push_back(Elt: SrcReg);
242 } else {
243 // Need to split into common type sized pieces.
244 auto Unmerge = MIRBuilder.buildUnmerge(Res: GCDTy, Op: SrcReg);
245 getUnmergeResults(Regs&: Parts, MI: *Unmerge);
246 }
247}
248
249LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
250 LLT NarrowTy, Register SrcReg) {
251 LLT SrcTy = MRI.getType(Reg: SrcReg);
252 LLT GCDTy = getGCDType(OrigTy: getGCDType(OrigTy: SrcTy, TargetTy: NarrowTy), TargetTy: DstTy);
253 extractGCDType(Parts, GCDTy, SrcReg);
254 return GCDTy;
255}
256
257LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
258 SmallVectorImpl<Register> &VRegs,
259 unsigned PadStrategy) {
260 LLT LCMTy = getLCMType(OrigTy: DstTy, TargetTy: NarrowTy);
261
262 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
263 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
264 int NumOrigSrc = VRegs.size();
265
266 Register PadReg;
267
268 // Get a value we can use to pad the source value if the sources won't evenly
269 // cover the result type.
270 if (NumOrigSrc < NumParts * NumSubParts) {
271 if (PadStrategy == TargetOpcode::G_ZEXT)
272 PadReg = MIRBuilder.buildConstant(Res: GCDTy, Val: 0).getReg(Idx: 0);
273 else if (PadStrategy == TargetOpcode::G_ANYEXT)
274 PadReg = MIRBuilder.buildUndef(Res: GCDTy).getReg(Idx: 0);
275 else {
276 assert(PadStrategy == TargetOpcode::G_SEXT);
277
278 // Shift the sign bit of the low register through the high register.
279 auto ShiftAmt =
280 MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 64), Val: GCDTy.getSizeInBits() - 1);
281 PadReg = MIRBuilder.buildAShr(Dst: GCDTy, Src0: VRegs.back(), Src1: ShiftAmt).getReg(Idx: 0);
282 }
283 }
284
285 // Registers for the final merge to be produced.
286 SmallVector<Register, 4> Remerge(NumParts);
287
288 // Registers needed for intermediate merges, which will be merged into a
289 // source for Remerge.
290 SmallVector<Register, 4> SubMerge(NumSubParts);
291
292 // Once we've fully read off the end of the original source bits, we can reuse
293 // the same high bits for remaining padding elements.
294 Register AllPadReg;
295
296 // Build merges to the LCM type to cover the original result type.
297 for (int I = 0; I != NumParts; ++I) {
298 bool AllMergePartsArePadding = true;
299
300 // Build the requested merges to the requested type.
301 for (int J = 0; J != NumSubParts; ++J) {
302 int Idx = I * NumSubParts + J;
303 if (Idx >= NumOrigSrc) {
304 SubMerge[J] = PadReg;
305 continue;
306 }
307
308 SubMerge[J] = VRegs[Idx];
309
310 // There are meaningful bits here we can't reuse later.
311 AllMergePartsArePadding = false;
312 }
313
314 // If we've filled up a complete piece with padding bits, we can directly
315 // emit the natural sized constant if applicable, rather than a merge of
316 // smaller constants.
317 if (AllMergePartsArePadding && !AllPadReg) {
318 if (PadStrategy == TargetOpcode::G_ANYEXT)
319 AllPadReg = MIRBuilder.buildUndef(Res: NarrowTy).getReg(Idx: 0);
320 else if (PadStrategy == TargetOpcode::G_ZEXT)
321 AllPadReg = MIRBuilder.buildConstant(Res: NarrowTy, Val: 0).getReg(Idx: 0);
322
323 // If this is a sign extension, we can't materialize a trivial constant
324 // with the right type and have to produce a merge.
325 }
326
327 if (AllPadReg) {
328 // Avoid creating additional instructions if we're just adding additional
329 // copies of padding bits.
330 Remerge[I] = AllPadReg;
331 continue;
332 }
333
334 if (NumSubParts == 1)
335 Remerge[I] = SubMerge[0];
336 else
337 Remerge[I] = MIRBuilder.buildMergeLikeInstr(Res: NarrowTy, Ops: SubMerge).getReg(Idx: 0);
338
339 // In the sign extend padding case, re-use the first all-signbit merge.
340 if (AllMergePartsArePadding && !AllPadReg)
341 AllPadReg = Remerge[I];
342 }
343
344 VRegs = std::move(Remerge);
345 return LCMTy;
346}
347
348void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
349 ArrayRef<Register> RemergeRegs) {
350 LLT DstTy = MRI.getType(Reg: DstReg);
351
352 // Create the merge to the widened source, and extract the relevant bits into
353 // the result.
354
355 if (DstTy == LCMTy) {
356 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: RemergeRegs);
357 return;
358 }
359
360 auto Remerge = MIRBuilder.buildMergeLikeInstr(Res: LCMTy, Ops: RemergeRegs);
361 if (DstTy.isScalar() && LCMTy.isScalar()) {
362 MIRBuilder.buildTrunc(Res: DstReg, Op: Remerge);
363 return;
364 }
365
366 if (LCMTy.isVector()) {
367 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
368 SmallVector<Register, 8> UnmergeDefs(NumDefs);
369 UnmergeDefs[0] = DstReg;
370 for (unsigned I = 1; I != NumDefs; ++I)
371 UnmergeDefs[I] = MRI.createGenericVirtualRegister(Ty: DstTy);
372
373 MIRBuilder.buildUnmerge(Res: UnmergeDefs,
374 Op: MIRBuilder.buildMergeLikeInstr(Res: LCMTy, Ops: RemergeRegs));
375 return;
376 }
377
378 llvm_unreachable("unhandled case");
379}
380
381static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
382#define RTLIBCASE_INT(LibcallPrefix) \
383 do { \
384 switch (Size) { \
385 case 32: \
386 return RTLIB::LibcallPrefix##32; \
387 case 64: \
388 return RTLIB::LibcallPrefix##64; \
389 case 128: \
390 return RTLIB::LibcallPrefix##128; \
391 default: \
392 llvm_unreachable("unexpected size"); \
393 } \
394 } while (0)
395
396#define RTLIBCASE(LibcallPrefix) \
397 do { \
398 switch (Size) { \
399 case 32: \
400 return RTLIB::LibcallPrefix##32; \
401 case 64: \
402 return RTLIB::LibcallPrefix##64; \
403 case 80: \
404 return RTLIB::LibcallPrefix##80; \
405 case 128: \
406 return RTLIB::LibcallPrefix##128; \
407 default: \
408 llvm_unreachable("unexpected size"); \
409 } \
410 } while (0)
411
412 switch (Opcode) {
413 case TargetOpcode::G_MUL:
414 RTLIBCASE_INT(MUL_I);
415 case TargetOpcode::G_SDIV:
416 RTLIBCASE_INT(SDIV_I);
417 case TargetOpcode::G_UDIV:
418 RTLIBCASE_INT(UDIV_I);
419 case TargetOpcode::G_SREM:
420 RTLIBCASE_INT(SREM_I);
421 case TargetOpcode::G_UREM:
422 RTLIBCASE_INT(UREM_I);
423 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
424 RTLIBCASE_INT(CTLZ_I);
425 case TargetOpcode::G_FADD:
426 RTLIBCASE(ADD_F);
427 case TargetOpcode::G_FSUB:
428 RTLIBCASE(SUB_F);
429 case TargetOpcode::G_FMUL:
430 RTLIBCASE(MUL_F);
431 case TargetOpcode::G_FDIV:
432 RTLIBCASE(DIV_F);
433 case TargetOpcode::G_FEXP:
434 RTLIBCASE(EXP_F);
435 case TargetOpcode::G_FEXP2:
436 RTLIBCASE(EXP2_F);
437 case TargetOpcode::G_FEXP10:
438 RTLIBCASE(EXP10_F);
439 case TargetOpcode::G_FREM:
440 RTLIBCASE(REM_F);
441 case TargetOpcode::G_FPOW:
442 RTLIBCASE(POW_F);
443 case TargetOpcode::G_FPOWI:
444 RTLIBCASE(POWI_F);
445 case TargetOpcode::G_FMA:
446 RTLIBCASE(FMA_F);
447 case TargetOpcode::G_FSIN:
448 RTLIBCASE(SIN_F);
449 case TargetOpcode::G_FCOS:
450 RTLIBCASE(COS_F);
451 case TargetOpcode::G_FLOG10:
452 RTLIBCASE(LOG10_F);
453 case TargetOpcode::G_FLOG:
454 RTLIBCASE(LOG_F);
455 case TargetOpcode::G_FLOG2:
456 RTLIBCASE(LOG2_F);
457 case TargetOpcode::G_FLDEXP:
458 RTLIBCASE(LDEXP_F);
459 case TargetOpcode::G_FCEIL:
460 RTLIBCASE(CEIL_F);
461 case TargetOpcode::G_FFLOOR:
462 RTLIBCASE(FLOOR_F);
463 case TargetOpcode::G_FMINNUM:
464 RTLIBCASE(FMIN_F);
465 case TargetOpcode::G_FMAXNUM:
466 RTLIBCASE(FMAX_F);
467 case TargetOpcode::G_FSQRT:
468 RTLIBCASE(SQRT_F);
469 case TargetOpcode::G_FRINT:
470 RTLIBCASE(RINT_F);
471 case TargetOpcode::G_FNEARBYINT:
472 RTLIBCASE(NEARBYINT_F);
473 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
474 RTLIBCASE(ROUNDEVEN_F);
475 case TargetOpcode::G_INTRINSIC_LRINT:
476 RTLIBCASE(LRINT_F);
477 case TargetOpcode::G_INTRINSIC_LLRINT:
478 RTLIBCASE(LLRINT_F);
479 }
480 llvm_unreachable("Unknown libcall function");
481}
482
483/// True if an instruction is in tail position in its caller. Intended for
484/// legalizing libcalls as tail calls when possible.
485static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result,
486 MachineInstr &MI,
487 const TargetInstrInfo &TII,
488 MachineRegisterInfo &MRI) {
489 MachineBasicBlock &MBB = *MI.getParent();
490 const Function &F = MBB.getParent()->getFunction();
491
492 // Conservatively require the attributes of the call to match those of
493 // the return. Ignore NoAlias and NonNull because they don't affect the
494 // call sequence.
495 AttributeList CallerAttrs = F.getAttributes();
496 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
497 .removeAttribute(Attribute::NoAlias)
498 .removeAttribute(Attribute::NonNull)
499 .hasAttributes())
500 return false;
501
502 // It's not safe to eliminate the sign / zero extension of the return value.
503 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
504 CallerAttrs.hasRetAttr(Attribute::SExt))
505 return false;
506
507 // Only tail call if the following instruction is a standard return or if we
508 // have a `thisreturn` callee, and a sequence like:
509 //
510 // G_MEMCPY %0, %1, %2
511 // $x0 = COPY %0
512 // RET_ReallyLR implicit $x0
513 auto Next = next_nodbg(It: MI.getIterator(), End: MBB.instr_end());
514 if (Next != MBB.instr_end() && Next->isCopy()) {
515 if (MI.getOpcode() == TargetOpcode::G_BZERO)
516 return false;
517
518 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
519 // mempy/etc routines return the same parameter. For other it will be the
520 // returned value.
521 Register VReg = MI.getOperand(i: 0).getReg();
522 if (!VReg.isVirtual() || VReg != Next->getOperand(i: 1).getReg())
523 return false;
524
525 Register PReg = Next->getOperand(i: 0).getReg();
526 if (!PReg.isPhysical())
527 return false;
528
529 auto Ret = next_nodbg(It: Next, End: MBB.instr_end());
530 if (Ret == MBB.instr_end() || !Ret->isReturn())
531 return false;
532
533 if (Ret->getNumImplicitOperands() != 1)
534 return false;
535
536 if (!Ret->getOperand(i: 0).isReg() || PReg != Ret->getOperand(i: 0).getReg())
537 return false;
538
539 // Skip over the COPY that we just validated.
540 Next = Ret;
541 }
542
543 if (Next == MBB.instr_end() || TII.isTailCall(Inst: *Next) || !Next->isReturn())
544 return false;
545
546 return true;
547}
548
549LegalizerHelper::LegalizeResult
550llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
551 const CallLowering::ArgInfo &Result,
552 ArrayRef<CallLowering::ArgInfo> Args,
553 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
554 MachineInstr *MI) {
555 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
556
557 CallLowering::CallLoweringInfo Info;
558 Info.CallConv = CC;
559 Info.Callee = MachineOperand::CreateES(SymName: Name);
560 Info.OrigRet = Result;
561 if (MI)
562 Info.IsTailCall =
563 (Result.Ty->isVoidTy() ||
564 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
565 isLibCallInTailPosition(Result, MI&: *MI, TII: MIRBuilder.getTII(),
566 MRI&: *MIRBuilder.getMRI());
567
568 std::copy(first: Args.begin(), last: Args.end(), result: std::back_inserter(x&: Info.OrigArgs));
569 if (!CLI.lowerCall(MIRBuilder, Info))
570 return LegalizerHelper::UnableToLegalize;
571
572 if (MI && Info.LoweredTailCall) {
573 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
574
575 // Check debug locations before removing the return.
576 LocObserver.checkpoint(CheckDebugLocs: true);
577
578 // We must have a return following the call (or debug insts) to get past
579 // isLibCallInTailPosition.
580 do {
581 MachineInstr *Next = MI->getNextNode();
582 assert(Next &&
583 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
584 "Expected instr following MI to be return or debug inst?");
585 // We lowered a tail call, so the call is now the return from the block.
586 // Delete the old return.
587 Next->eraseFromParent();
588 } while (MI->getNextNode());
589
590 // We expect to lose the debug location from the return.
591 LocObserver.checkpoint(CheckDebugLocs: false);
592 }
593 return LegalizerHelper::Legalized;
594}
595
596LegalizerHelper::LegalizeResult
597llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
598 const CallLowering::ArgInfo &Result,
599 ArrayRef<CallLowering::ArgInfo> Args,
600 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
601 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
602 const char *Name = TLI.getLibcallName(Call: Libcall);
603 if (!Name)
604 return LegalizerHelper::UnableToLegalize;
605 const CallingConv::ID CC = TLI.getLibcallCallingConv(Call: Libcall);
606 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
607}
608
609// Useful for libcalls where all operands have the same type.
610static LegalizerHelper::LegalizeResult
611simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
612 Type *OpType, LostDebugLocObserver &LocObserver) {
613 auto Libcall = getRTLibDesc(Opcode: MI.getOpcode(), Size);
614
615 // FIXME: What does the original arg index mean here?
616 SmallVector<CallLowering::ArgInfo, 3> Args;
617 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands()))
618 Args.push_back(Elt: {MO.getReg(), OpType, 0});
619 return createLibcall(MIRBuilder, Libcall,
620 Result: {MI.getOperand(i: 0).getReg(), OpType, 0}, Args,
621 LocObserver, MI: &MI);
622}
623
624LegalizerHelper::LegalizeResult
625llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
626 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
627 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
628
629 SmallVector<CallLowering::ArgInfo, 3> Args;
630 // Add all the args, except for the last which is an imm denoting 'tail'.
631 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
632 Register Reg = MI.getOperand(i).getReg();
633
634 // Need derive an IR type for call lowering.
635 LLT OpLLT = MRI.getType(Reg);
636 Type *OpTy = nullptr;
637 if (OpLLT.isPointer())
638 OpTy = PointerType::get(C&: Ctx, AddressSpace: OpLLT.getAddressSpace());
639 else
640 OpTy = IntegerType::get(C&: Ctx, NumBits: OpLLT.getSizeInBits());
641 Args.push_back(Elt: {Reg, OpTy, 0});
642 }
643
644 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
645 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
646 RTLIB::Libcall RTLibcall;
647 unsigned Opc = MI.getOpcode();
648 switch (Opc) {
649 case TargetOpcode::G_BZERO:
650 RTLibcall = RTLIB::BZERO;
651 break;
652 case TargetOpcode::G_MEMCPY:
653 RTLibcall = RTLIB::MEMCPY;
654 Args[0].Flags[0].setReturned();
655 break;
656 case TargetOpcode::G_MEMMOVE:
657 RTLibcall = RTLIB::MEMMOVE;
658 Args[0].Flags[0].setReturned();
659 break;
660 case TargetOpcode::G_MEMSET:
661 RTLibcall = RTLIB::MEMSET;
662 Args[0].Flags[0].setReturned();
663 break;
664 default:
665 llvm_unreachable("unsupported opcode");
666 }
667 const char *Name = TLI.getLibcallName(Call: RTLibcall);
668
669 // Unsupported libcall on the target.
670 if (!Name) {
671 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
672 << MIRBuilder.getTII().getName(Opc) << "\n");
673 return LegalizerHelper::UnableToLegalize;
674 }
675
676 CallLowering::CallLoweringInfo Info;
677 Info.CallConv = TLI.getLibcallCallingConv(Call: RTLibcall);
678 Info.Callee = MachineOperand::CreateES(SymName: Name);
679 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(C&: Ctx), 0);
680 Info.IsTailCall =
681 MI.getOperand(i: MI.getNumOperands() - 1).getImm() &&
682 isLibCallInTailPosition(Result: Info.OrigRet, MI, TII: MIRBuilder.getTII(), MRI);
683
684 std::copy(first: Args.begin(), last: Args.end(), result: std::back_inserter(x&: Info.OrigArgs));
685 if (!CLI.lowerCall(MIRBuilder, Info))
686 return LegalizerHelper::UnableToLegalize;
687
688 if (Info.LoweredTailCall) {
689 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
690
691 // Check debug locations before removing the return.
692 LocObserver.checkpoint(CheckDebugLocs: true);
693
694 // We must have a return following the call (or debug insts) to get past
695 // isLibCallInTailPosition.
696 do {
697 MachineInstr *Next = MI.getNextNode();
698 assert(Next &&
699 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
700 "Expected instr following MI to be return or debug inst?");
701 // We lowered a tail call, so the call is now the return from the block.
702 // Delete the old return.
703 Next->eraseFromParent();
704 } while (MI.getNextNode());
705
706 // We expect to lose the debug location from the return.
707 LocObserver.checkpoint(CheckDebugLocs: false);
708 }
709
710 return LegalizerHelper::Legalized;
711}
712
713static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
714 unsigned Opc = MI.getOpcode();
715 auto &AtomicMI = cast<GMemOperation>(Val&: MI);
716 auto &MMO = AtomicMI.getMMO();
717 auto Ordering = MMO.getMergedOrdering();
718 LLT MemType = MMO.getMemoryType();
719 uint64_t MemSize = MemType.getSizeInBytes();
720 if (MemType.isVector())
721 return RTLIB::UNKNOWN_LIBCALL;
722
723#define LCALLS(A, B) \
724 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
725#define LCALL5(A) \
726 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
727 switch (Opc) {
728 case TargetOpcode::G_ATOMIC_CMPXCHG:
729 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
730 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
731 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
732 }
733 case TargetOpcode::G_ATOMICRMW_XCHG: {
734 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
735 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
736 }
737 case TargetOpcode::G_ATOMICRMW_ADD:
738 case TargetOpcode::G_ATOMICRMW_SUB: {
739 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
740 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
741 }
742 case TargetOpcode::G_ATOMICRMW_AND: {
743 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
744 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
745 }
746 case TargetOpcode::G_ATOMICRMW_OR: {
747 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
748 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
749 }
750 case TargetOpcode::G_ATOMICRMW_XOR: {
751 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
752 return getOutlineAtomicHelper(LC, Order: Ordering, MemSize);
753 }
754 default:
755 return RTLIB::UNKNOWN_LIBCALL;
756 }
757#undef LCALLS
758#undef LCALL5
759}
760
761static LegalizerHelper::LegalizeResult
762createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
763 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
764
765 Type *RetTy;
766 SmallVector<Register> RetRegs;
767 SmallVector<CallLowering::ArgInfo, 3> Args;
768 unsigned Opc = MI.getOpcode();
769 switch (Opc) {
770 case TargetOpcode::G_ATOMIC_CMPXCHG:
771 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
772 Register Success;
773 LLT SuccessLLT;
774 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
775 MI.getFirst4RegLLTs();
776 RetRegs.push_back(Elt: Ret);
777 RetTy = IntegerType::get(C&: Ctx, NumBits: RetLLT.getSizeInBits());
778 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
779 std::tie(args&: Ret, args&: RetLLT, args&: Success, args&: SuccessLLT, args&: Mem, args&: MemLLT, args&: Cmp, args&: CmpLLT, args&: New,
780 args&: NewLLT) = MI.getFirst5RegLLTs();
781 RetRegs.push_back(Elt: Success);
782 RetTy = StructType::get(
783 Context&: Ctx, Elements: {RetTy, IntegerType::get(C&: Ctx, NumBits: SuccessLLT.getSizeInBits())});
784 }
785 Args.push_back(Elt: {Cmp, IntegerType::get(C&: Ctx, NumBits: CmpLLT.getSizeInBits()), 0});
786 Args.push_back(Elt: {New, IntegerType::get(C&: Ctx, NumBits: NewLLT.getSizeInBits()), 0});
787 Args.push_back(Elt: {Mem, PointerType::get(C&: Ctx, AddressSpace: MemLLT.getAddressSpace()), 0});
788 break;
789 }
790 case TargetOpcode::G_ATOMICRMW_XCHG:
791 case TargetOpcode::G_ATOMICRMW_ADD:
792 case TargetOpcode::G_ATOMICRMW_SUB:
793 case TargetOpcode::G_ATOMICRMW_AND:
794 case TargetOpcode::G_ATOMICRMW_OR:
795 case TargetOpcode::G_ATOMICRMW_XOR: {
796 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
797 RetRegs.push_back(Elt: Ret);
798 RetTy = IntegerType::get(C&: Ctx, NumBits: RetLLT.getSizeInBits());
799 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
800 Val =
801 MIRBuilder.buildXor(Dst: ValLLT, Src0: MIRBuilder.buildConstant(Res: ValLLT, Val: -1), Src1: Val)
802 .getReg(Idx: 0);
803 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
804 Val =
805 MIRBuilder.buildSub(Dst: ValLLT, Src0: MIRBuilder.buildConstant(Res: ValLLT, Val: 0), Src1: Val)
806 .getReg(Idx: 0);
807 Args.push_back(Elt: {Val, IntegerType::get(C&: Ctx, NumBits: ValLLT.getSizeInBits()), 0});
808 Args.push_back(Elt: {Mem, PointerType::get(C&: Ctx, AddressSpace: MemLLT.getAddressSpace()), 0});
809 break;
810 }
811 default:
812 llvm_unreachable("unsupported opcode");
813 }
814
815 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
816 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
817 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
818 const char *Name = TLI.getLibcallName(Call: RTLibcall);
819
820 // Unsupported libcall on the target.
821 if (!Name) {
822 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
823 << MIRBuilder.getTII().getName(Opc) << "\n");
824 return LegalizerHelper::UnableToLegalize;
825 }
826
827 CallLowering::CallLoweringInfo Info;
828 Info.CallConv = TLI.getLibcallCallingConv(Call: RTLibcall);
829 Info.Callee = MachineOperand::CreateES(SymName: Name);
830 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
831
832 std::copy(first: Args.begin(), last: Args.end(), result: std::back_inserter(x&: Info.OrigArgs));
833 if (!CLI.lowerCall(MIRBuilder, Info))
834 return LegalizerHelper::UnableToLegalize;
835
836 return LegalizerHelper::Legalized;
837}
838
839static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
840 Type *FromType) {
841 auto ToMVT = MVT::getVT(Ty: ToType);
842 auto FromMVT = MVT::getVT(Ty: FromType);
843
844 switch (Opcode) {
845 case TargetOpcode::G_FPEXT:
846 return RTLIB::getFPEXT(OpVT: FromMVT, RetVT: ToMVT);
847 case TargetOpcode::G_FPTRUNC:
848 return RTLIB::getFPROUND(OpVT: FromMVT, RetVT: ToMVT);
849 case TargetOpcode::G_FPTOSI:
850 return RTLIB::getFPTOSINT(OpVT: FromMVT, RetVT: ToMVT);
851 case TargetOpcode::G_FPTOUI:
852 return RTLIB::getFPTOUINT(OpVT: FromMVT, RetVT: ToMVT);
853 case TargetOpcode::G_SITOFP:
854 return RTLIB::getSINTTOFP(OpVT: FromMVT, RetVT: ToMVT);
855 case TargetOpcode::G_UITOFP:
856 return RTLIB::getUINTTOFP(OpVT: FromMVT, RetVT: ToMVT);
857 }
858 llvm_unreachable("Unsupported libcall function");
859}
860
861static LegalizerHelper::LegalizeResult
862conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
863 Type *FromType, LostDebugLocObserver &LocObserver) {
864 RTLIB::Libcall Libcall = getConvRTLibDesc(Opcode: MI.getOpcode(), ToType, FromType);
865 return createLibcall(
866 MIRBuilder, Libcall, Result: {MI.getOperand(i: 0).getReg(), ToType, 0},
867 Args: {{MI.getOperand(i: 1).getReg(), FromType, 0}}, LocObserver, MI: &MI);
868}
869
870static RTLIB::Libcall
871getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
872 RTLIB::Libcall RTLibcall;
873 switch (MI.getOpcode()) {
874 case TargetOpcode::G_GET_FPENV:
875 RTLibcall = RTLIB::FEGETENV;
876 break;
877 case TargetOpcode::G_SET_FPENV:
878 case TargetOpcode::G_RESET_FPENV:
879 RTLibcall = RTLIB::FESETENV;
880 break;
881 case TargetOpcode::G_GET_FPMODE:
882 RTLibcall = RTLIB::FEGETMODE;
883 break;
884 case TargetOpcode::G_SET_FPMODE:
885 case TargetOpcode::G_RESET_FPMODE:
886 RTLibcall = RTLIB::FESETMODE;
887 break;
888 default:
889 llvm_unreachable("Unexpected opcode");
890 }
891 return RTLibcall;
892}
893
894// Some library functions that read FP state (fegetmode, fegetenv) write the
895// state into a region in memory. IR intrinsics that do the same operations
896// (get_fpmode, get_fpenv) return the state as integer value. To implement these
897// intrinsics via the library functions, we need to use temporary variable,
898// for example:
899//
900// %0:_(s32) = G_GET_FPMODE
901//
902// is transformed to:
903//
904// %1:_(p0) = G_FRAME_INDEX %stack.0
905// BL &fegetmode
906// %0:_(s32) = G_LOAD % 1
907//
908LegalizerHelper::LegalizeResult
909LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
910 MachineInstr &MI,
911 LostDebugLocObserver &LocObserver) {
912 const DataLayout &DL = MIRBuilder.getDataLayout();
913 auto &MF = MIRBuilder.getMF();
914 auto &MRI = *MIRBuilder.getMRI();
915 auto &Ctx = MF.getFunction().getContext();
916
917 // Create temporary, where library function will put the read state.
918 Register Dst = MI.getOperand(i: 0).getReg();
919 LLT StateTy = MRI.getType(Reg: Dst);
920 TypeSize StateSize = StateTy.getSizeInBytes();
921 Align TempAlign = getStackTemporaryAlignment(Type: StateTy);
922 MachinePointerInfo TempPtrInfo;
923 auto Temp = createStackTemporary(Bytes: StateSize, Alignment: TempAlign, PtrInfo&: TempPtrInfo);
924
925 // Create a call to library function, with the temporary as an argument.
926 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
927 Type *StatePtrTy = PointerType::get(C&: Ctx, AddressSpace: TempAddrSpace);
928 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
929 auto Res =
930 createLibcall(MIRBuilder, Libcall: RTLibcall,
931 Result: CallLowering::ArgInfo({0}, Type::getVoidTy(C&: Ctx), 0),
932 Args: CallLowering::ArgInfo({Temp.getReg(Idx: 0), StatePtrTy, 0}),
933 LocObserver, MI: nullptr);
934 if (Res != LegalizerHelper::Legalized)
935 return Res;
936
937 // Create a load from the temporary.
938 MachineMemOperand *MMO = MF.getMachineMemOperand(
939 PtrInfo: TempPtrInfo, f: MachineMemOperand::MOLoad, MemTy: StateTy, base_alignment: TempAlign);
940 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_LOAD, Res: Dst, Addr: Temp, MMO&: *MMO);
941
942 return LegalizerHelper::Legalized;
943}
944
945// Similar to `createGetStateLibcall` the function calls a library function
946// using transient space in stack. In this case the library function reads
947// content of memory region.
948LegalizerHelper::LegalizeResult
949LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
950 MachineInstr &MI,
951 LostDebugLocObserver &LocObserver) {
952 const DataLayout &DL = MIRBuilder.getDataLayout();
953 auto &MF = MIRBuilder.getMF();
954 auto &MRI = *MIRBuilder.getMRI();
955 auto &Ctx = MF.getFunction().getContext();
956
957 // Create temporary, where library function will get the new state.
958 Register Src = MI.getOperand(i: 0).getReg();
959 LLT StateTy = MRI.getType(Reg: Src);
960 TypeSize StateSize = StateTy.getSizeInBytes();
961 Align TempAlign = getStackTemporaryAlignment(Type: StateTy);
962 MachinePointerInfo TempPtrInfo;
963 auto Temp = createStackTemporary(Bytes: StateSize, Alignment: TempAlign, PtrInfo&: TempPtrInfo);
964
965 // Put the new state into the temporary.
966 MachineMemOperand *MMO = MF.getMachineMemOperand(
967 PtrInfo: TempPtrInfo, f: MachineMemOperand::MOStore, MemTy: StateTy, base_alignment: TempAlign);
968 MIRBuilder.buildStore(Val: Src, Addr: Temp, MMO&: *MMO);
969
970 // Create a call to library function, with the temporary as an argument.
971 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
972 Type *StatePtrTy = PointerType::get(C&: Ctx, AddressSpace: TempAddrSpace);
973 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
974 return createLibcall(MIRBuilder, Libcall: RTLibcall,
975 Result: CallLowering::ArgInfo({0}, Type::getVoidTy(C&: Ctx), 0),
976 Args: CallLowering::ArgInfo({Temp.getReg(Idx: 0), StatePtrTy, 0}),
977 LocObserver, MI: nullptr);
978}
979
980// The function is used to legalize operations that set default environment
981// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
982// On most targets supported in glibc FE_DFL_MODE is defined as
983// `((const femode_t *) -1)`. Such assumption is used here. If for some target
984// it is not true, the target must provide custom lowering.
985LegalizerHelper::LegalizeResult
986LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
987 MachineInstr &MI,
988 LostDebugLocObserver &LocObserver) {
989 const DataLayout &DL = MIRBuilder.getDataLayout();
990 auto &MF = MIRBuilder.getMF();
991 auto &Ctx = MF.getFunction().getContext();
992
993 // Create an argument for the library function.
994 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
995 Type *StatePtrTy = PointerType::get(C&: Ctx, AddressSpace: AddrSpace);
996 unsigned PtrSize = DL.getPointerSizeInBits(AS: AddrSpace);
997 LLT MemTy = LLT::pointer(AddressSpace: AddrSpace, SizeInBits: PtrSize);
998 auto DefValue = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: PtrSize), Val: -1LL);
999 DstOp Dest(MRI.createGenericVirtualRegister(Ty: MemTy));
1000 MIRBuilder.buildIntToPtr(Dst: Dest, Src: DefValue);
1001
1002 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1003 return createLibcall(MIRBuilder, Libcall: RTLibcall,
1004 Result: CallLowering::ArgInfo({0}, Type::getVoidTy(C&: Ctx), 0),
1005 Args: CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1006 LocObserver, MI: &MI);
1007}
1008
1009LegalizerHelper::LegalizeResult
1010LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
1011 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1012
1013 switch (MI.getOpcode()) {
1014 default:
1015 return UnableToLegalize;
1016 case TargetOpcode::G_MUL:
1017 case TargetOpcode::G_SDIV:
1018 case TargetOpcode::G_UDIV:
1019 case TargetOpcode::G_SREM:
1020 case TargetOpcode::G_UREM:
1021 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1022 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1023 unsigned Size = LLTy.getSizeInBits();
1024 Type *HLTy = IntegerType::get(C&: Ctx, NumBits: Size);
1025 auto Status = simpleLibcall(MI, MIRBuilder, Size, OpType: HLTy, LocObserver);
1026 if (Status != Legalized)
1027 return Status;
1028 break;
1029 }
1030 case TargetOpcode::G_FADD:
1031 case TargetOpcode::G_FSUB:
1032 case TargetOpcode::G_FMUL:
1033 case TargetOpcode::G_FDIV:
1034 case TargetOpcode::G_FMA:
1035 case TargetOpcode::G_FPOW:
1036 case TargetOpcode::G_FREM:
1037 case TargetOpcode::G_FCOS:
1038 case TargetOpcode::G_FSIN:
1039 case TargetOpcode::G_FLOG10:
1040 case TargetOpcode::G_FLOG:
1041 case TargetOpcode::G_FLOG2:
1042 case TargetOpcode::G_FLDEXP:
1043 case TargetOpcode::G_FEXP:
1044 case TargetOpcode::G_FEXP2:
1045 case TargetOpcode::G_FEXP10:
1046 case TargetOpcode::G_FCEIL:
1047 case TargetOpcode::G_FFLOOR:
1048 case TargetOpcode::G_FMINNUM:
1049 case TargetOpcode::G_FMAXNUM:
1050 case TargetOpcode::G_FSQRT:
1051 case TargetOpcode::G_FRINT:
1052 case TargetOpcode::G_FNEARBYINT:
1053 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1054 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1055 unsigned Size = LLTy.getSizeInBits();
1056 Type *HLTy = getFloatTypeForLLT(Ctx, Ty: LLTy);
1057 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1058 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1059 return UnableToLegalize;
1060 }
1061 auto Status = simpleLibcall(MI, MIRBuilder, Size, OpType: HLTy, LocObserver);
1062 if (Status != Legalized)
1063 return Status;
1064 break;
1065 }
1066 case TargetOpcode::G_INTRINSIC_LRINT:
1067 case TargetOpcode::G_INTRINSIC_LLRINT: {
1068 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
1069 unsigned Size = LLTy.getSizeInBits();
1070 Type *HLTy = getFloatTypeForLLT(Ctx, Ty: LLTy);
1071 Type *ITy = IntegerType::get(
1072 C&: Ctx, NumBits: MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits());
1073 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1074 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1075 return UnableToLegalize;
1076 }
1077 auto Libcall = getRTLibDesc(Opcode: MI.getOpcode(), Size);
1078 LegalizeResult Status =
1079 createLibcall(MIRBuilder, Libcall, Result: {MI.getOperand(i: 0).getReg(), ITy, 0},
1080 Args: {{MI.getOperand(i: 1).getReg(), HLTy, 0}}, LocObserver, MI: &MI);
1081 if (Status != Legalized)
1082 return Status;
1083 MI.eraseFromParent();
1084 return Legalized;
1085 }
1086 case TargetOpcode::G_FPOWI: {
1087 LLT LLTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1088 unsigned Size = LLTy.getSizeInBits();
1089 Type *HLTy = getFloatTypeForLLT(Ctx, Ty: LLTy);
1090 Type *ITy = IntegerType::get(
1091 C&: Ctx, NumBits: MRI.getType(Reg: MI.getOperand(i: 2).getReg()).getSizeInBits());
1092 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1093 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1094 return UnableToLegalize;
1095 }
1096 auto Libcall = getRTLibDesc(Opcode: MI.getOpcode(), Size);
1097 std::initializer_list<CallLowering::ArgInfo> Args = {
1098 {MI.getOperand(i: 1).getReg(), HLTy, 0},
1099 {MI.getOperand(i: 2).getReg(), ITy, 1}};
1100 LegalizeResult Status =
1101 createLibcall(MIRBuilder, Libcall, Result: {MI.getOperand(i: 0).getReg(), HLTy, 0},
1102 Args, LocObserver, MI: &MI);
1103 if (Status != Legalized)
1104 return Status;
1105 break;
1106 }
1107 case TargetOpcode::G_FPEXT:
1108 case TargetOpcode::G_FPTRUNC: {
1109 Type *FromTy = getFloatTypeForLLT(Ctx, Ty: MRI.getType(Reg: MI.getOperand(i: 1).getReg()));
1110 Type *ToTy = getFloatTypeForLLT(Ctx, Ty: MRI.getType(Reg: MI.getOperand(i: 0).getReg()));
1111 if (!FromTy || !ToTy)
1112 return UnableToLegalize;
1113 LegalizeResult Status =
1114 conversionLibcall(MI, MIRBuilder, ToType: ToTy, FromType: FromTy, LocObserver);
1115 if (Status != Legalized)
1116 return Status;
1117 break;
1118 }
1119 case TargetOpcode::G_FPTOSI:
1120 case TargetOpcode::G_FPTOUI: {
1121 // FIXME: Support other types
1122 unsigned FromSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
1123 unsigned ToSize = MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits();
1124 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
1125 return UnableToLegalize;
1126 LegalizeResult Status = conversionLibcall(
1127 MI, MIRBuilder,
1128 ToType: ToSize == 32 ? Type::getInt32Ty(C&: Ctx) : Type::getInt64Ty(C&: Ctx),
1129 FromType: FromSize == 64 ? Type::getDoubleTy(C&: Ctx) : Type::getFloatTy(C&: Ctx),
1130 LocObserver);
1131 if (Status != Legalized)
1132 return Status;
1133 break;
1134 }
1135 case TargetOpcode::G_SITOFP:
1136 case TargetOpcode::G_UITOFP: {
1137 // FIXME: Support other types
1138 unsigned FromSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
1139 unsigned ToSize = MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits();
1140 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
1141 return UnableToLegalize;
1142 LegalizeResult Status = conversionLibcall(
1143 MI, MIRBuilder,
1144 ToType: ToSize == 64 ? Type::getDoubleTy(C&: Ctx) : Type::getFloatTy(C&: Ctx),
1145 FromType: FromSize == 32 ? Type::getInt32Ty(C&: Ctx) : Type::getInt64Ty(C&: Ctx),
1146 LocObserver);
1147 if (Status != Legalized)
1148 return Status;
1149 break;
1150 }
1151 case TargetOpcode::G_ATOMICRMW_XCHG:
1152 case TargetOpcode::G_ATOMICRMW_ADD:
1153 case TargetOpcode::G_ATOMICRMW_SUB:
1154 case TargetOpcode::G_ATOMICRMW_AND:
1155 case TargetOpcode::G_ATOMICRMW_OR:
1156 case TargetOpcode::G_ATOMICRMW_XOR:
1157 case TargetOpcode::G_ATOMIC_CMPXCHG:
1158 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1159 auto Status = createAtomicLibcall(MIRBuilder, MI);
1160 if (Status != Legalized)
1161 return Status;
1162 break;
1163 }
1164 case TargetOpcode::G_BZERO:
1165 case TargetOpcode::G_MEMCPY:
1166 case TargetOpcode::G_MEMMOVE:
1167 case TargetOpcode::G_MEMSET: {
1168 LegalizeResult Result =
1169 createMemLibcall(MIRBuilder, MRI&: *MIRBuilder.getMRI(), MI, LocObserver);
1170 if (Result != Legalized)
1171 return Result;
1172 MI.eraseFromParent();
1173 return Result;
1174 }
1175 case TargetOpcode::G_GET_FPENV:
1176 case TargetOpcode::G_GET_FPMODE: {
1177 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1178 if (Result != Legalized)
1179 return Result;
1180 break;
1181 }
1182 case TargetOpcode::G_SET_FPENV:
1183 case TargetOpcode::G_SET_FPMODE: {
1184 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1185 if (Result != Legalized)
1186 return Result;
1187 break;
1188 }
1189 case TargetOpcode::G_RESET_FPENV:
1190 case TargetOpcode::G_RESET_FPMODE: {
1191 LegalizeResult Result =
1192 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1193 if (Result != Legalized)
1194 return Result;
1195 break;
1196 }
1197 }
1198
1199 MI.eraseFromParent();
1200 return Legalized;
1201}
1202
1203LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
1204 unsigned TypeIdx,
1205 LLT NarrowTy) {
1206 uint64_t SizeOp0 = MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits();
1207 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1208
1209 switch (MI.getOpcode()) {
1210 default:
1211 return UnableToLegalize;
1212 case TargetOpcode::G_IMPLICIT_DEF: {
1213 Register DstReg = MI.getOperand(i: 0).getReg();
1214 LLT DstTy = MRI.getType(Reg: DstReg);
1215
1216 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1217 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1218 // FIXME: Although this would also be legal for the general case, it causes
1219 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1220 // combines not being hit). This seems to be a problem related to the
1221 // artifact combiner.
1222 if (SizeOp0 % NarrowSize != 0) {
1223 LLT ImplicitTy = NarrowTy;
1224 if (DstTy.isVector())
1225 ImplicitTy = LLT::vector(EC: DstTy.getElementCount(), ScalarTy: ImplicitTy);
1226
1227 Register ImplicitReg = MIRBuilder.buildUndef(Res: ImplicitTy).getReg(Idx: 0);
1228 MIRBuilder.buildAnyExt(Res: DstReg, Op: ImplicitReg);
1229
1230 MI.eraseFromParent();
1231 return Legalized;
1232 }
1233
1234 int NumParts = SizeOp0 / NarrowSize;
1235
1236 SmallVector<Register, 2> DstRegs;
1237 for (int i = 0; i < NumParts; ++i)
1238 DstRegs.push_back(Elt: MIRBuilder.buildUndef(Res: NarrowTy).getReg(Idx: 0));
1239
1240 if (DstTy.isVector())
1241 MIRBuilder.buildBuildVector(Res: DstReg, Ops: DstRegs);
1242 else
1243 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
1244 MI.eraseFromParent();
1245 return Legalized;
1246 }
1247 case TargetOpcode::G_CONSTANT: {
1248 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1249 const APInt &Val = MI.getOperand(i: 1).getCImm()->getValue();
1250 unsigned TotalSize = Ty.getSizeInBits();
1251 unsigned NarrowSize = NarrowTy.getSizeInBits();
1252 int NumParts = TotalSize / NarrowSize;
1253
1254 SmallVector<Register, 4> PartRegs;
1255 for (int I = 0; I != NumParts; ++I) {
1256 unsigned Offset = I * NarrowSize;
1257 auto K = MIRBuilder.buildConstant(Res: NarrowTy,
1258 Val: Val.lshr(shiftAmt: Offset).trunc(width: NarrowSize));
1259 PartRegs.push_back(Elt: K.getReg(Idx: 0));
1260 }
1261
1262 LLT LeftoverTy;
1263 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1264 SmallVector<Register, 1> LeftoverRegs;
1265 if (LeftoverBits != 0) {
1266 LeftoverTy = LLT::scalar(SizeInBits: LeftoverBits);
1267 auto K = MIRBuilder.buildConstant(
1268 Res: LeftoverTy,
1269 Val: Val.lshr(shiftAmt: NumParts * NarrowSize).trunc(width: LeftoverBits));
1270 LeftoverRegs.push_back(Elt: K.getReg(Idx: 0));
1271 }
1272
1273 insertParts(DstReg: MI.getOperand(i: 0).getReg(),
1274 ResultTy: Ty, PartTy: NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1275
1276 MI.eraseFromParent();
1277 return Legalized;
1278 }
1279 case TargetOpcode::G_SEXT:
1280 case TargetOpcode::G_ZEXT:
1281 case TargetOpcode::G_ANYEXT:
1282 return narrowScalarExt(MI, TypeIdx, Ty: NarrowTy);
1283 case TargetOpcode::G_TRUNC: {
1284 if (TypeIdx != 1)
1285 return UnableToLegalize;
1286
1287 uint64_t SizeOp1 = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
1288 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1289 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1290 return UnableToLegalize;
1291 }
1292
1293 auto Unmerge = MIRBuilder.buildUnmerge(Res: NarrowTy, Op: MI.getOperand(i: 1));
1294 MIRBuilder.buildCopy(Res: MI.getOperand(i: 0), Op: Unmerge.getReg(Idx: 0));
1295 MI.eraseFromParent();
1296 return Legalized;
1297 }
1298
1299 case TargetOpcode::G_FREEZE: {
1300 if (TypeIdx != 0)
1301 return UnableToLegalize;
1302
1303 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
1304 // Should widen scalar first
1305 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1306 return UnableToLegalize;
1307
1308 auto Unmerge = MIRBuilder.buildUnmerge(Res: NarrowTy, Op: MI.getOperand(i: 1).getReg());
1309 SmallVector<Register, 8> Parts;
1310 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1311 Parts.push_back(
1312 Elt: MIRBuilder.buildFreeze(Dst: NarrowTy, Src: Unmerge.getReg(Idx: i)).getReg(Idx: 0));
1313 }
1314
1315 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0).getReg(), Ops: Parts);
1316 MI.eraseFromParent();
1317 return Legalized;
1318 }
1319 case TargetOpcode::G_ADD:
1320 case TargetOpcode::G_SUB:
1321 case TargetOpcode::G_SADDO:
1322 case TargetOpcode::G_SSUBO:
1323 case TargetOpcode::G_SADDE:
1324 case TargetOpcode::G_SSUBE:
1325 case TargetOpcode::G_UADDO:
1326 case TargetOpcode::G_USUBO:
1327 case TargetOpcode::G_UADDE:
1328 case TargetOpcode::G_USUBE:
1329 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1330 case TargetOpcode::G_MUL:
1331 case TargetOpcode::G_UMULH:
1332 return narrowScalarMul(MI, Ty: NarrowTy);
1333 case TargetOpcode::G_EXTRACT:
1334 return narrowScalarExtract(MI, TypeIdx, Ty: NarrowTy);
1335 case TargetOpcode::G_INSERT:
1336 return narrowScalarInsert(MI, TypeIdx, Ty: NarrowTy);
1337 case TargetOpcode::G_LOAD: {
1338 auto &LoadMI = cast<GLoad>(Val&: MI);
1339 Register DstReg = LoadMI.getDstReg();
1340 LLT DstTy = MRI.getType(Reg: DstReg);
1341 if (DstTy.isVector())
1342 return UnableToLegalize;
1343
1344 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1345 Register TmpReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1346 MIRBuilder.buildLoad(Res: TmpReg, Addr: LoadMI.getPointerReg(), MMO&: LoadMI.getMMO());
1347 MIRBuilder.buildAnyExt(Res: DstReg, Op: TmpReg);
1348 LoadMI.eraseFromParent();
1349 return Legalized;
1350 }
1351
1352 return reduceLoadStoreWidth(MI&: LoadMI, TypeIdx, NarrowTy);
1353 }
1354 case TargetOpcode::G_ZEXTLOAD:
1355 case TargetOpcode::G_SEXTLOAD: {
1356 auto &LoadMI = cast<GExtLoad>(Val&: MI);
1357 Register DstReg = LoadMI.getDstReg();
1358 Register PtrReg = LoadMI.getPointerReg();
1359
1360 Register TmpReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1361 auto &MMO = LoadMI.getMMO();
1362 unsigned MemSize = MMO.getSizeInBits().getValue();
1363
1364 if (MemSize == NarrowSize) {
1365 MIRBuilder.buildLoad(Res: TmpReg, Addr: PtrReg, MMO);
1366 } else if (MemSize < NarrowSize) {
1367 MIRBuilder.buildLoadInstr(Opcode: LoadMI.getOpcode(), Res: TmpReg, Addr: PtrReg, MMO);
1368 } else if (MemSize > NarrowSize) {
1369 // FIXME: Need to split the load.
1370 return UnableToLegalize;
1371 }
1372
1373 if (isa<GZExtLoad>(Val: LoadMI))
1374 MIRBuilder.buildZExt(Res: DstReg, Op: TmpReg);
1375 else
1376 MIRBuilder.buildSExt(Res: DstReg, Op: TmpReg);
1377
1378 LoadMI.eraseFromParent();
1379 return Legalized;
1380 }
1381 case TargetOpcode::G_STORE: {
1382 auto &StoreMI = cast<GStore>(Val&: MI);
1383
1384 Register SrcReg = StoreMI.getValueReg();
1385 LLT SrcTy = MRI.getType(Reg: SrcReg);
1386 if (SrcTy.isVector())
1387 return UnableToLegalize;
1388
1389 int NumParts = SizeOp0 / NarrowSize;
1390 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1391 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1392 if (SrcTy.isVector() && LeftoverBits != 0)
1393 return UnableToLegalize;
1394
1395 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1396 Register TmpReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1397 MIRBuilder.buildTrunc(Res: TmpReg, Op: SrcReg);
1398 MIRBuilder.buildStore(Val: TmpReg, Addr: StoreMI.getPointerReg(), MMO&: StoreMI.getMMO());
1399 StoreMI.eraseFromParent();
1400 return Legalized;
1401 }
1402
1403 return reduceLoadStoreWidth(MI&: StoreMI, TypeIdx: 0, NarrowTy);
1404 }
1405 case TargetOpcode::G_SELECT:
1406 return narrowScalarSelect(MI, TypeIdx, Ty: NarrowTy);
1407 case TargetOpcode::G_AND:
1408 case TargetOpcode::G_OR:
1409 case TargetOpcode::G_XOR: {
1410 // Legalize bitwise operation:
1411 // A = BinOp<Ty> B, C
1412 // into:
1413 // B1, ..., BN = G_UNMERGE_VALUES B
1414 // C1, ..., CN = G_UNMERGE_VALUES C
1415 // A1 = BinOp<Ty/N> B1, C2
1416 // ...
1417 // AN = BinOp<Ty/N> BN, CN
1418 // A = G_MERGE_VALUES A1, ..., AN
1419 return narrowScalarBasic(MI, TypeIdx, Ty: NarrowTy);
1420 }
1421 case TargetOpcode::G_SHL:
1422 case TargetOpcode::G_LSHR:
1423 case TargetOpcode::G_ASHR:
1424 return narrowScalarShift(MI, TypeIdx, Ty: NarrowTy);
1425 case TargetOpcode::G_CTLZ:
1426 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1427 case TargetOpcode::G_CTTZ:
1428 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1429 case TargetOpcode::G_CTPOP:
1430 if (TypeIdx == 1)
1431 switch (MI.getOpcode()) {
1432 case TargetOpcode::G_CTLZ:
1433 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1434 return narrowScalarCTLZ(MI, TypeIdx, Ty: NarrowTy);
1435 case TargetOpcode::G_CTTZ:
1436 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1437 return narrowScalarCTTZ(MI, TypeIdx, Ty: NarrowTy);
1438 case TargetOpcode::G_CTPOP:
1439 return narrowScalarCTPOP(MI, TypeIdx, Ty: NarrowTy);
1440 default:
1441 return UnableToLegalize;
1442 }
1443
1444 Observer.changingInstr(MI);
1445 narrowScalarDst(MI, NarrowTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_ZEXT);
1446 Observer.changedInstr(MI);
1447 return Legalized;
1448 case TargetOpcode::G_INTTOPTR:
1449 if (TypeIdx != 1)
1450 return UnableToLegalize;
1451
1452 Observer.changingInstr(MI);
1453 narrowScalarSrc(MI, NarrowTy, OpIdx: 1);
1454 Observer.changedInstr(MI);
1455 return Legalized;
1456 case TargetOpcode::G_PTRTOINT:
1457 if (TypeIdx != 0)
1458 return UnableToLegalize;
1459
1460 Observer.changingInstr(MI);
1461 narrowScalarDst(MI, NarrowTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_ZEXT);
1462 Observer.changedInstr(MI);
1463 return Legalized;
1464 case TargetOpcode::G_PHI: {
1465 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1466 // NarrowSize.
1467 if (SizeOp0 % NarrowSize != 0)
1468 return UnableToLegalize;
1469
1470 unsigned NumParts = SizeOp0 / NarrowSize;
1471 SmallVector<Register, 2> DstRegs(NumParts);
1472 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1473 Observer.changingInstr(MI);
1474 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1475 MachineBasicBlock &OpMBB = *MI.getOperand(i: i + 1).getMBB();
1476 MIRBuilder.setInsertPt(MBB&: OpMBB, II: OpMBB.getFirstTerminatorForward());
1477 extractParts(Reg: MI.getOperand(i).getReg(), Ty: NarrowTy, NumParts,
1478 VRegs&: SrcRegs[i / 2], MIRBuilder, MRI);
1479 }
1480 MachineBasicBlock &MBB = *MI.getParent();
1481 MIRBuilder.setInsertPt(MBB, II: MI);
1482 for (unsigned i = 0; i < NumParts; ++i) {
1483 DstRegs[i] = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1484 MachineInstrBuilder MIB =
1485 MIRBuilder.buildInstr(Opcode: TargetOpcode::G_PHI).addDef(RegNo: DstRegs[i]);
1486 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1487 MIB.addUse(RegNo: SrcRegs[j / 2][i]).add(MO: MI.getOperand(i: j + 1));
1488 }
1489 MIRBuilder.setInsertPt(MBB, II: MBB.getFirstNonPHI());
1490 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: DstRegs);
1491 Observer.changedInstr(MI);
1492 MI.eraseFromParent();
1493 return Legalized;
1494 }
1495 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1496 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1497 if (TypeIdx != 2)
1498 return UnableToLegalize;
1499
1500 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1501 Observer.changingInstr(MI);
1502 narrowScalarSrc(MI, NarrowTy, OpIdx);
1503 Observer.changedInstr(MI);
1504 return Legalized;
1505 }
1506 case TargetOpcode::G_ICMP: {
1507 Register LHS = MI.getOperand(i: 2).getReg();
1508 LLT SrcTy = MRI.getType(Reg: LHS);
1509 uint64_t SrcSize = SrcTy.getSizeInBits();
1510 CmpInst::Predicate Pred =
1511 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
1512
1513 // TODO: Handle the non-equality case for weird sizes.
1514 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(P: Pred))
1515 return UnableToLegalize;
1516
1517 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1518 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1519 if (!extractParts(Reg: LHS, RegTy: SrcTy, MainTy: NarrowTy, LeftoverTy, VRegs&: LHSPartRegs,
1520 LeftoverVRegs&: LHSLeftoverRegs, MIRBuilder, MRI))
1521 return UnableToLegalize;
1522
1523 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1524 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1525 if (!extractParts(Reg: MI.getOperand(i: 3).getReg(), RegTy: SrcTy, MainTy: NarrowTy, LeftoverTy&: Unused,
1526 VRegs&: RHSPartRegs, LeftoverVRegs&: RHSLeftoverRegs, MIRBuilder, MRI))
1527 return UnableToLegalize;
1528
1529 // We now have the LHS and RHS of the compare split into narrow-type
1530 // registers, plus potentially some leftover type.
1531 Register Dst = MI.getOperand(i: 0).getReg();
1532 LLT ResTy = MRI.getType(Reg: Dst);
1533 if (ICmpInst::isEquality(P: Pred)) {
1534 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1535 // them together. For each equal part, the result should be all 0s. For
1536 // each non-equal part, we'll get at least one 1.
1537 auto Zero = MIRBuilder.buildConstant(Res: NarrowTy, Val: 0);
1538 SmallVector<Register, 4> Xors;
1539 for (auto LHSAndRHS : zip(t&: LHSPartRegs, u&: RHSPartRegs)) {
1540 auto LHS = std::get<0>(t&: LHSAndRHS);
1541 auto RHS = std::get<1>(t&: LHSAndRHS);
1542 auto Xor = MIRBuilder.buildXor(Dst: NarrowTy, Src0: LHS, Src1: RHS).getReg(Idx: 0);
1543 Xors.push_back(Elt: Xor);
1544 }
1545
1546 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1547 // to the desired narrow type so that we can OR them together later.
1548 SmallVector<Register, 4> WidenedXors;
1549 for (auto LHSAndRHS : zip(t&: LHSLeftoverRegs, u&: RHSLeftoverRegs)) {
1550 auto LHS = std::get<0>(t&: LHSAndRHS);
1551 auto RHS = std::get<1>(t&: LHSAndRHS);
1552 auto Xor = MIRBuilder.buildXor(Dst: LeftoverTy, Src0: LHS, Src1: RHS).getReg(Idx: 0);
1553 LLT GCDTy = extractGCDType(Parts&: WidenedXors, DstTy: NarrowTy, NarrowTy: LeftoverTy, SrcReg: Xor);
1554 buildLCMMergePieces(DstTy: LeftoverTy, NarrowTy, GCDTy, VRegs&: WidenedXors,
1555 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1556 Xors.insert(I: Xors.end(), From: WidenedXors.begin(), To: WidenedXors.end());
1557 }
1558
1559 // Now, for each part we broke up, we know if they are equal/not equal
1560 // based off the G_XOR. We can OR these all together and compare against
1561 // 0 to get the result.
1562 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1563 auto Or = MIRBuilder.buildOr(Dst: NarrowTy, Src0: Xors[0], Src1: Xors[1]);
1564 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1565 Or = MIRBuilder.buildOr(Dst: NarrowTy, Src0: Or, Src1: Xors[I]);
1566 MIRBuilder.buildICmp(Pred, Res: Dst, Op0: Or, Op1: Zero);
1567 } else {
1568 // TODO: Handle non-power-of-two types.
1569 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1570 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1571 Register LHSL = LHSPartRegs[0];
1572 Register LHSH = LHSPartRegs[1];
1573 Register RHSL = RHSPartRegs[0];
1574 Register RHSH = RHSPartRegs[1];
1575 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, Res: ResTy, Op0: LHSH, Op1: RHSH);
1576 MachineInstrBuilder CmpHEQ =
1577 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: ResTy, Op0: LHSH, Op1: RHSH);
1578 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1579 Pred: ICmpInst::getUnsignedPredicate(pred: Pred), Res: ResTy, Op0: LHSL, Op1: RHSL);
1580 MIRBuilder.buildSelect(Res: Dst, Tst: CmpHEQ, Op0: CmpLU, Op1: CmpH);
1581 }
1582 MI.eraseFromParent();
1583 return Legalized;
1584 }
1585 case TargetOpcode::G_FCMP:
1586 if (TypeIdx != 0)
1587 return UnableToLegalize;
1588
1589 Observer.changingInstr(MI);
1590 narrowScalarDst(MI, NarrowTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_ZEXT);
1591 Observer.changedInstr(MI);
1592 return Legalized;
1593
1594 case TargetOpcode::G_SEXT_INREG: {
1595 if (TypeIdx != 0)
1596 return UnableToLegalize;
1597
1598 int64_t SizeInBits = MI.getOperand(i: 2).getImm();
1599
1600 // So long as the new type has more bits than the bits we're extending we
1601 // don't need to break it apart.
1602 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1603 Observer.changingInstr(MI);
1604 // We don't lose any non-extension bits by truncating the src and
1605 // sign-extending the dst.
1606 MachineOperand &MO1 = MI.getOperand(i: 1);
1607 auto TruncMIB = MIRBuilder.buildTrunc(Res: NarrowTy, Op: MO1);
1608 MO1.setReg(TruncMIB.getReg(Idx: 0));
1609
1610 MachineOperand &MO2 = MI.getOperand(i: 0);
1611 Register DstExt = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1612 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
1613 MIRBuilder.buildSExt(Res: MO2, Op: DstExt);
1614 MO2.setReg(DstExt);
1615 Observer.changedInstr(MI);
1616 return Legalized;
1617 }
1618
1619 // Break it apart. Components below the extension point are unmodified. The
1620 // component containing the extension point becomes a narrower SEXT_INREG.
1621 // Components above it are ashr'd from the component containing the
1622 // extension point.
1623 if (SizeOp0 % NarrowSize != 0)
1624 return UnableToLegalize;
1625 int NumParts = SizeOp0 / NarrowSize;
1626
1627 // List the registers where the destination will be scattered.
1628 SmallVector<Register, 2> DstRegs;
1629 // List the registers where the source will be split.
1630 SmallVector<Register, 2> SrcRegs;
1631
1632 // Create all the temporary registers.
1633 for (int i = 0; i < NumParts; ++i) {
1634 Register SrcReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1635
1636 SrcRegs.push_back(Elt: SrcReg);
1637 }
1638
1639 // Explode the big arguments into smaller chunks.
1640 MIRBuilder.buildUnmerge(Res: SrcRegs, Op: MI.getOperand(i: 1));
1641
1642 Register AshrCstReg =
1643 MIRBuilder.buildConstant(Res: NarrowTy, Val: NarrowTy.getScalarSizeInBits() - 1)
1644 .getReg(Idx: 0);
1645 Register FullExtensionReg;
1646 Register PartialExtensionReg;
1647
1648 // Do the operation on each small part.
1649 for (int i = 0; i < NumParts; ++i) {
1650 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1651 DstRegs.push_back(Elt: SrcRegs[i]);
1652 PartialExtensionReg = DstRegs.back();
1653 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1654 assert(PartialExtensionReg &&
1655 "Expected to visit partial extension before full");
1656 if (FullExtensionReg) {
1657 DstRegs.push_back(Elt: FullExtensionReg);
1658 continue;
1659 }
1660 DstRegs.push_back(
1661 Elt: MIRBuilder.buildAShr(Dst: NarrowTy, Src0: PartialExtensionReg, Src1: AshrCstReg)
1662 .getReg(Idx: 0));
1663 FullExtensionReg = DstRegs.back();
1664 } else {
1665 DstRegs.push_back(
1666 Elt: MIRBuilder
1667 .buildInstr(
1668 Opc: TargetOpcode::G_SEXT_INREG, DstOps: {NarrowTy},
1669 SrcOps: {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1670 .getReg(Idx: 0));
1671 PartialExtensionReg = DstRegs.back();
1672 }
1673 }
1674
1675 // Gather the destination registers into the final destination.
1676 Register DstReg = MI.getOperand(i: 0).getReg();
1677 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
1678 MI.eraseFromParent();
1679 return Legalized;
1680 }
1681 case TargetOpcode::G_BSWAP:
1682 case TargetOpcode::G_BITREVERSE: {
1683 if (SizeOp0 % NarrowSize != 0)
1684 return UnableToLegalize;
1685
1686 Observer.changingInstr(MI);
1687 SmallVector<Register, 2> SrcRegs, DstRegs;
1688 unsigned NumParts = SizeOp0 / NarrowSize;
1689 extractParts(Reg: MI.getOperand(i: 1).getReg(), Ty: NarrowTy, NumParts, VRegs&: SrcRegs,
1690 MIRBuilder, MRI);
1691
1692 for (unsigned i = 0; i < NumParts; ++i) {
1693 auto DstPart = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {NarrowTy},
1694 SrcOps: {SrcRegs[NumParts - 1 - i]});
1695 DstRegs.push_back(Elt: DstPart.getReg(Idx: 0));
1696 }
1697
1698 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: DstRegs);
1699
1700 Observer.changedInstr(MI);
1701 MI.eraseFromParent();
1702 return Legalized;
1703 }
1704 case TargetOpcode::G_PTR_ADD:
1705 case TargetOpcode::G_PTRMASK: {
1706 if (TypeIdx != 1)
1707 return UnableToLegalize;
1708 Observer.changingInstr(MI);
1709 narrowScalarSrc(MI, NarrowTy, OpIdx: 2);
1710 Observer.changedInstr(MI);
1711 return Legalized;
1712 }
1713 case TargetOpcode::G_FPTOUI:
1714 case TargetOpcode::G_FPTOSI:
1715 return narrowScalarFPTOI(MI, TypeIdx, Ty: NarrowTy);
1716 case TargetOpcode::G_FPEXT:
1717 if (TypeIdx != 0)
1718 return UnableToLegalize;
1719 Observer.changingInstr(MI);
1720 narrowScalarDst(MI, NarrowTy, OpIdx: 0, ExtOpcode: TargetOpcode::G_FPEXT);
1721 Observer.changedInstr(MI);
1722 return Legalized;
1723 case TargetOpcode::G_FLDEXP:
1724 case TargetOpcode::G_STRICT_FLDEXP:
1725 return narrowScalarFLDEXP(MI, TypeIdx, Ty: NarrowTy);
1726 case TargetOpcode::G_VSCALE: {
1727 Register Dst = MI.getOperand(i: 0).getReg();
1728 LLT Ty = MRI.getType(Reg: Dst);
1729
1730 // Assume VSCALE(1) fits into a legal integer
1731 const APInt One(NarrowTy.getSizeInBits(), 1);
1732 auto VScaleBase = MIRBuilder.buildVScale(Res: NarrowTy, MinElts: One);
1733 auto ZExt = MIRBuilder.buildZExt(Res: Ty, Op: VScaleBase);
1734 auto C = MIRBuilder.buildConstant(Res: Ty, Val: *MI.getOperand(i: 1).getCImm());
1735 MIRBuilder.buildMul(Dst, Src0: ZExt, Src1: C);
1736
1737 MI.eraseFromParent();
1738 return Legalized;
1739 }
1740 }
1741}
1742
1743Register LegalizerHelper::coerceToScalar(Register Val) {
1744 LLT Ty = MRI.getType(Reg: Val);
1745 if (Ty.isScalar())
1746 return Val;
1747
1748 const DataLayout &DL = MIRBuilder.getDataLayout();
1749 LLT NewTy = LLT::scalar(SizeInBits: Ty.getSizeInBits());
1750 if (Ty.isPointer()) {
1751 if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace()))
1752 return Register();
1753 return MIRBuilder.buildPtrToInt(Dst: NewTy, Src: Val).getReg(Idx: 0);
1754 }
1755
1756 Register NewVal = Val;
1757
1758 assert(Ty.isVector());
1759 if (Ty.isPointerVector())
1760 NewVal = MIRBuilder.buildPtrToInt(Dst: NewTy, Src: NewVal).getReg(Idx: 0);
1761 return MIRBuilder.buildBitcast(Dst: NewTy, Src: NewVal).getReg(Idx: 0);
1762}
1763
1764void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1765 unsigned OpIdx, unsigned ExtOpcode) {
1766 MachineOperand &MO = MI.getOperand(i: OpIdx);
1767 auto ExtB = MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {WideTy}, SrcOps: {MO});
1768 MO.setReg(ExtB.getReg(Idx: 0));
1769}
1770
1771void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1772 unsigned OpIdx) {
1773 MachineOperand &MO = MI.getOperand(i: OpIdx);
1774 auto ExtB = MIRBuilder.buildTrunc(Res: NarrowTy, Op: MO);
1775 MO.setReg(ExtB.getReg(Idx: 0));
1776}
1777
1778void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1779 unsigned OpIdx, unsigned TruncOpcode) {
1780 MachineOperand &MO = MI.getOperand(i: OpIdx);
1781 Register DstExt = MRI.createGenericVirtualRegister(Ty: WideTy);
1782 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
1783 MIRBuilder.buildInstr(Opc: TruncOpcode, DstOps: {MO}, SrcOps: {DstExt});
1784 MO.setReg(DstExt);
1785}
1786
1787void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1788 unsigned OpIdx, unsigned ExtOpcode) {
1789 MachineOperand &MO = MI.getOperand(i: OpIdx);
1790 Register DstTrunc = MRI.createGenericVirtualRegister(Ty: NarrowTy);
1791 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
1792 MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {MO}, SrcOps: {DstTrunc});
1793 MO.setReg(DstTrunc);
1794}
1795
1796void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1797 unsigned OpIdx) {
1798 MachineOperand &MO = MI.getOperand(i: OpIdx);
1799 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
1800 Register Dst = MO.getReg();
1801 Register DstExt = MRI.createGenericVirtualRegister(Ty: WideTy);
1802 MO.setReg(DstExt);
1803 MIRBuilder.buildDeleteTrailingVectorElements(Res: Dst, Op0: DstExt);
1804}
1805
1806void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1807 unsigned OpIdx) {
1808 MachineOperand &MO = MI.getOperand(i: OpIdx);
1809 SmallVector<Register, 8> Regs;
1810 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(Res: MoreTy, Op0: MO).getReg(Idx: 0));
1811}
1812
1813void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1814 MachineOperand &Op = MI.getOperand(i: OpIdx);
1815 Op.setReg(MIRBuilder.buildBitcast(Dst: CastTy, Src: Op).getReg(Idx: 0));
1816}
1817
1818void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1819 MachineOperand &MO = MI.getOperand(i: OpIdx);
1820 Register CastDst = MRI.createGenericVirtualRegister(Ty: CastTy);
1821 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
1822 MIRBuilder.buildBitcast(Dst: MO, Src: CastDst);
1823 MO.setReg(CastDst);
1824}
1825
1826LegalizerHelper::LegalizeResult
1827LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1828 LLT WideTy) {
1829 if (TypeIdx != 1)
1830 return UnableToLegalize;
1831
1832 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1833 if (DstTy.isVector())
1834 return UnableToLegalize;
1835
1836 LLT SrcTy = MRI.getType(Reg: Src1Reg);
1837 const int DstSize = DstTy.getSizeInBits();
1838 const int SrcSize = SrcTy.getSizeInBits();
1839 const int WideSize = WideTy.getSizeInBits();
1840 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1841
1842 unsigned NumOps = MI.getNumOperands();
1843 unsigned NumSrc = MI.getNumOperands() - 1;
1844 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1845
1846 if (WideSize >= DstSize) {
1847 // Directly pack the bits in the target type.
1848 Register ResultReg = MIRBuilder.buildZExt(Res: WideTy, Op: Src1Reg).getReg(Idx: 0);
1849
1850 for (unsigned I = 2; I != NumOps; ++I) {
1851 const unsigned Offset = (I - 1) * PartSize;
1852
1853 Register SrcReg = MI.getOperand(i: I).getReg();
1854 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1855
1856 auto ZextInput = MIRBuilder.buildZExt(Res: WideTy, Op: SrcReg);
1857
1858 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1859 MRI.createGenericVirtualRegister(Ty: WideTy);
1860
1861 auto ShiftAmt = MIRBuilder.buildConstant(Res: WideTy, Val: Offset);
1862 auto Shl = MIRBuilder.buildShl(Dst: WideTy, Src0: ZextInput, Src1: ShiftAmt);
1863 MIRBuilder.buildOr(Dst: NextResult, Src0: ResultReg, Src1: Shl);
1864 ResultReg = NextResult;
1865 }
1866
1867 if (WideSize > DstSize)
1868 MIRBuilder.buildTrunc(Res: DstReg, Op: ResultReg);
1869 else if (DstTy.isPointer())
1870 MIRBuilder.buildIntToPtr(Dst: DstReg, Src: ResultReg);
1871
1872 MI.eraseFromParent();
1873 return Legalized;
1874 }
1875
1876 // Unmerge the original values to the GCD type, and recombine to the next
1877 // multiple greater than the original type.
1878 //
1879 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1880 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1881 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1882 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1883 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1884 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1885 // %12:_(s12) = G_MERGE_VALUES %10, %11
1886 //
1887 // Padding with undef if necessary:
1888 //
1889 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1890 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1891 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1892 // %7:_(s2) = G_IMPLICIT_DEF
1893 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1894 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1895 // %10:_(s12) = G_MERGE_VALUES %8, %9
1896
1897 const int GCD = std::gcd(m: SrcSize, n: WideSize);
1898 LLT GCDTy = LLT::scalar(SizeInBits: GCD);
1899
1900 SmallVector<Register, 8> Parts;
1901 SmallVector<Register, 8> NewMergeRegs;
1902 SmallVector<Register, 8> Unmerges;
1903 LLT WideDstTy = LLT::scalar(SizeInBits: NumMerge * WideSize);
1904
1905 // Decompose the original operands if they don't evenly divide.
1906 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) {
1907 Register SrcReg = MO.getReg();
1908 if (GCD == SrcSize) {
1909 Unmerges.push_back(Elt: SrcReg);
1910 } else {
1911 auto Unmerge = MIRBuilder.buildUnmerge(Res: GCDTy, Op: SrcReg);
1912 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1913 Unmerges.push_back(Elt: Unmerge.getReg(Idx: J));
1914 }
1915 }
1916
1917 // Pad with undef to the next size that is a multiple of the requested size.
1918 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1919 Register UndefReg = MIRBuilder.buildUndef(Res: GCDTy).getReg(Idx: 0);
1920 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1921 Unmerges.push_back(Elt: UndefReg);
1922 }
1923
1924 const int PartsPerGCD = WideSize / GCD;
1925
1926 // Build merges of each piece.
1927 ArrayRef<Register> Slicer(Unmerges);
1928 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(N: PartsPerGCD)) {
1929 auto Merge =
1930 MIRBuilder.buildMergeLikeInstr(Res: WideTy, Ops: Slicer.take_front(N: PartsPerGCD));
1931 NewMergeRegs.push_back(Elt: Merge.getReg(Idx: 0));
1932 }
1933
1934 // A truncate may be necessary if the requested type doesn't evenly divide the
1935 // original result type.
1936 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1937 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: NewMergeRegs);
1938 } else {
1939 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(Res: WideDstTy, Ops: NewMergeRegs);
1940 MIRBuilder.buildTrunc(Res: DstReg, Op: FinalMerge.getReg(Idx: 0));
1941 }
1942
1943 MI.eraseFromParent();
1944 return Legalized;
1945}
1946
1947LegalizerHelper::LegalizeResult
1948LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1949 LLT WideTy) {
1950 if (TypeIdx != 0)
1951 return UnableToLegalize;
1952
1953 int NumDst = MI.getNumOperands() - 1;
1954 Register SrcReg = MI.getOperand(i: NumDst).getReg();
1955 LLT SrcTy = MRI.getType(Reg: SrcReg);
1956 if (SrcTy.isVector())
1957 return UnableToLegalize;
1958
1959 Register Dst0Reg = MI.getOperand(i: 0).getReg();
1960 LLT DstTy = MRI.getType(Reg: Dst0Reg);
1961 if (!DstTy.isScalar())
1962 return UnableToLegalize;
1963
1964 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1965 if (SrcTy.isPointer()) {
1966 const DataLayout &DL = MIRBuilder.getDataLayout();
1967 if (DL.isNonIntegralAddressSpace(AddrSpace: SrcTy.getAddressSpace())) {
1968 LLVM_DEBUG(
1969 dbgs() << "Not casting non-integral address space integer\n");
1970 return UnableToLegalize;
1971 }
1972
1973 SrcTy = LLT::scalar(SizeInBits: SrcTy.getSizeInBits());
1974 SrcReg = MIRBuilder.buildPtrToInt(Dst: SrcTy, Src: SrcReg).getReg(Idx: 0);
1975 }
1976
1977 // Widen SrcTy to WideTy. This does not affect the result, but since the
1978 // user requested this size, it is probably better handled than SrcTy and
1979 // should reduce the total number of legalization artifacts.
1980 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1981 SrcTy = WideTy;
1982 SrcReg = MIRBuilder.buildAnyExt(Res: WideTy, Op: SrcReg).getReg(Idx: 0);
1983 }
1984
1985 // Theres no unmerge type to target. Directly extract the bits from the
1986 // source type
1987 unsigned DstSize = DstTy.getSizeInBits();
1988
1989 MIRBuilder.buildTrunc(Res: Dst0Reg, Op: SrcReg);
1990 for (int I = 1; I != NumDst; ++I) {
1991 auto ShiftAmt = MIRBuilder.buildConstant(Res: SrcTy, Val: DstSize * I);
1992 auto Shr = MIRBuilder.buildLShr(Dst: SrcTy, Src0: SrcReg, Src1: ShiftAmt);
1993 MIRBuilder.buildTrunc(Res: MI.getOperand(i: I), Op: Shr);
1994 }
1995
1996 MI.eraseFromParent();
1997 return Legalized;
1998 }
1999
2000 // Extend the source to a wider type.
2001 LLT LCMTy = getLCMType(OrigTy: SrcTy, TargetTy: WideTy);
2002
2003 Register WideSrc = SrcReg;
2004 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2005 // TODO: If this is an integral address space, cast to integer and anyext.
2006 if (SrcTy.isPointer()) {
2007 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2008 return UnableToLegalize;
2009 }
2010
2011 WideSrc = MIRBuilder.buildAnyExt(Res: LCMTy, Op: WideSrc).getReg(Idx: 0);
2012 }
2013
2014 auto Unmerge = MIRBuilder.buildUnmerge(Res: WideTy, Op: WideSrc);
2015
2016 // Create a sequence of unmerges and merges to the original results. Since we
2017 // may have widened the source, we will need to pad the results with dead defs
2018 // to cover the source register.
2019 // e.g. widen s48 to s64:
2020 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2021 //
2022 // =>
2023 // %4:_(s192) = G_ANYEXT %0:_(s96)
2024 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2025 // ; unpack to GCD type, with extra dead defs
2026 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2027 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2028 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2029 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2030 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2031 const LLT GCDTy = getGCDType(OrigTy: WideTy, TargetTy: DstTy);
2032 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2033 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2034
2035 // Directly unmerge to the destination without going through a GCD type
2036 // if possible
2037 if (PartsPerRemerge == 1) {
2038 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2039
2040 for (int I = 0; I != NumUnmerge; ++I) {
2041 auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_UNMERGE_VALUES);
2042
2043 for (int J = 0; J != PartsPerUnmerge; ++J) {
2044 int Idx = I * PartsPerUnmerge + J;
2045 if (Idx < NumDst)
2046 MIB.addDef(RegNo: MI.getOperand(i: Idx).getReg());
2047 else {
2048 // Create dead def for excess components.
2049 MIB.addDef(RegNo: MRI.createGenericVirtualRegister(Ty: DstTy));
2050 }
2051 }
2052
2053 MIB.addUse(RegNo: Unmerge.getReg(Idx: I));
2054 }
2055 } else {
2056 SmallVector<Register, 16> Parts;
2057 for (int J = 0; J != NumUnmerge; ++J)
2058 extractGCDType(Parts, GCDTy, SrcReg: Unmerge.getReg(Idx: J));
2059
2060 SmallVector<Register, 8> RemergeParts;
2061 for (int I = 0; I != NumDst; ++I) {
2062 for (int J = 0; J < PartsPerRemerge; ++J) {
2063 const int Idx = I * PartsPerRemerge + J;
2064 RemergeParts.emplace_back(Args&: Parts[Idx]);
2065 }
2066
2067 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: I).getReg(), Ops: RemergeParts);
2068 RemergeParts.clear();
2069 }
2070 }
2071
2072 MI.eraseFromParent();
2073 return Legalized;
2074}
2075
2076LegalizerHelper::LegalizeResult
2077LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2078 LLT WideTy) {
2079 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2080 unsigned Offset = MI.getOperand(i: 2).getImm();
2081
2082 if (TypeIdx == 0) {
2083 if (SrcTy.isVector() || DstTy.isVector())
2084 return UnableToLegalize;
2085
2086 SrcOp Src(SrcReg);
2087 if (SrcTy.isPointer()) {
2088 // Extracts from pointers can be handled only if they are really just
2089 // simple integers.
2090 const DataLayout &DL = MIRBuilder.getDataLayout();
2091 if (DL.isNonIntegralAddressSpace(AddrSpace: SrcTy.getAddressSpace()))
2092 return UnableToLegalize;
2093
2094 LLT SrcAsIntTy = LLT::scalar(SizeInBits: SrcTy.getSizeInBits());
2095 Src = MIRBuilder.buildPtrToInt(Dst: SrcAsIntTy, Src);
2096 SrcTy = SrcAsIntTy;
2097 }
2098
2099 if (DstTy.isPointer())
2100 return UnableToLegalize;
2101
2102 if (Offset == 0) {
2103 // Avoid a shift in the degenerate case.
2104 MIRBuilder.buildTrunc(Res: DstReg,
2105 Op: MIRBuilder.buildAnyExtOrTrunc(Res: WideTy, Op: Src));
2106 MI.eraseFromParent();
2107 return Legalized;
2108 }
2109
2110 // Do a shift in the source type.
2111 LLT ShiftTy = SrcTy;
2112 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2113 Src = MIRBuilder.buildAnyExt(Res: WideTy, Op: Src);
2114 ShiftTy = WideTy;
2115 }
2116
2117 auto LShr = MIRBuilder.buildLShr(
2118 Dst: ShiftTy, Src0: Src, Src1: MIRBuilder.buildConstant(Res: ShiftTy, Val: Offset));
2119 MIRBuilder.buildTrunc(Res: DstReg, Op: LShr);
2120 MI.eraseFromParent();
2121 return Legalized;
2122 }
2123
2124 if (SrcTy.isScalar()) {
2125 Observer.changingInstr(MI);
2126 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2127 Observer.changedInstr(MI);
2128 return Legalized;
2129 }
2130
2131 if (!SrcTy.isVector())
2132 return UnableToLegalize;
2133
2134 if (DstTy != SrcTy.getElementType())
2135 return UnableToLegalize;
2136
2137 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2138 return UnableToLegalize;
2139
2140 Observer.changingInstr(MI);
2141 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2142
2143 MI.getOperand(i: 2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2144 Offset);
2145 widenScalarDst(MI, WideTy: WideTy.getScalarType(), OpIdx: 0);
2146 Observer.changedInstr(MI);
2147 return Legalized;
2148}
2149
2150LegalizerHelper::LegalizeResult
2151LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2152 LLT WideTy) {
2153 if (TypeIdx != 0 || WideTy.isVector())
2154 return UnableToLegalize;
2155 Observer.changingInstr(MI);
2156 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2157 widenScalarDst(MI, WideTy);
2158 Observer.changedInstr(MI);
2159 return Legalized;
2160}
2161
2162LegalizerHelper::LegalizeResult
2163LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2164 LLT WideTy) {
2165 unsigned Opcode;
2166 unsigned ExtOpcode;
2167 std::optional<Register> CarryIn;
2168 switch (MI.getOpcode()) {
2169 default:
2170 llvm_unreachable("Unexpected opcode!");
2171 case TargetOpcode::G_SADDO:
2172 Opcode = TargetOpcode::G_ADD;
2173 ExtOpcode = TargetOpcode::G_SEXT;
2174 break;
2175 case TargetOpcode::G_SSUBO:
2176 Opcode = TargetOpcode::G_SUB;
2177 ExtOpcode = TargetOpcode::G_SEXT;
2178 break;
2179 case TargetOpcode::G_UADDO:
2180 Opcode = TargetOpcode::G_ADD;
2181 ExtOpcode = TargetOpcode::G_ZEXT;
2182 break;
2183 case TargetOpcode::G_USUBO:
2184 Opcode = TargetOpcode::G_SUB;
2185 ExtOpcode = TargetOpcode::G_ZEXT;
2186 break;
2187 case TargetOpcode::G_SADDE:
2188 Opcode = TargetOpcode::G_UADDE;
2189 ExtOpcode = TargetOpcode::G_SEXT;
2190 CarryIn = MI.getOperand(i: 4).getReg();
2191 break;
2192 case TargetOpcode::G_SSUBE:
2193 Opcode = TargetOpcode::G_USUBE;
2194 ExtOpcode = TargetOpcode::G_SEXT;
2195 CarryIn = MI.getOperand(i: 4).getReg();
2196 break;
2197 case TargetOpcode::G_UADDE:
2198 Opcode = TargetOpcode::G_UADDE;
2199 ExtOpcode = TargetOpcode::G_ZEXT;
2200 CarryIn = MI.getOperand(i: 4).getReg();
2201 break;
2202 case TargetOpcode::G_USUBE:
2203 Opcode = TargetOpcode::G_USUBE;
2204 ExtOpcode = TargetOpcode::G_ZEXT;
2205 CarryIn = MI.getOperand(i: 4).getReg();
2206 break;
2207 }
2208
2209 if (TypeIdx == 1) {
2210 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(IsVec: WideTy.isVector(), IsFP: false);
2211
2212 Observer.changingInstr(MI);
2213 if (CarryIn)
2214 widenScalarSrc(MI, WideTy, OpIdx: 4, ExtOpcode: BoolExtOp);
2215 widenScalarDst(MI, WideTy, OpIdx: 1);
2216
2217 Observer.changedInstr(MI);
2218 return Legalized;
2219 }
2220
2221 auto LHSExt = MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {WideTy}, SrcOps: {MI.getOperand(i: 2)});
2222 auto RHSExt = MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {WideTy}, SrcOps: {MI.getOperand(i: 3)});
2223 // Do the arithmetic in the larger type.
2224 Register NewOp;
2225 if (CarryIn) {
2226 LLT CarryOutTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
2227 NewOp = MIRBuilder
2228 .buildInstr(Opc: Opcode, DstOps: {WideTy, CarryOutTy},
2229 SrcOps: {LHSExt, RHSExt, *CarryIn})
2230 .getReg(Idx: 0);
2231 } else {
2232 NewOp = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {WideTy}, SrcOps: {LHSExt, RHSExt}).getReg(Idx: 0);
2233 }
2234 LLT OrigTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
2235 auto TruncOp = MIRBuilder.buildTrunc(Res: OrigTy, Op: NewOp);
2236 auto ExtOp = MIRBuilder.buildInstr(Opc: ExtOpcode, DstOps: {WideTy}, SrcOps: {TruncOp});
2237 // There is no overflow if the ExtOp is the same as NewOp.
2238 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: MI.getOperand(i: 1), Op0: NewOp, Op1: ExtOp);
2239 // Now trunc the NewOp to the original result.
2240 MIRBuilder.buildTrunc(Res: MI.getOperand(i: 0), Op: NewOp);
2241 MI.eraseFromParent();
2242 return Legalized;
2243}
2244
2245LegalizerHelper::LegalizeResult
2246LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2247 LLT WideTy) {
2248 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2249 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2250 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2251 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2252 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2253 // We can convert this to:
2254 // 1. Any extend iN to iM
2255 // 2. SHL by M-N
2256 // 3. [US][ADD|SUB|SHL]SAT
2257 // 4. L/ASHR by M-N
2258 //
2259 // It may be more efficient to lower this to a min and a max operation in
2260 // the higher precision arithmetic if the promoted operation isn't legal,
2261 // but this decision is up to the target's lowering request.
2262 Register DstReg = MI.getOperand(i: 0).getReg();
2263
2264 unsigned NewBits = WideTy.getScalarSizeInBits();
2265 unsigned SHLAmount = NewBits - MRI.getType(Reg: DstReg).getScalarSizeInBits();
2266
2267 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2268 // must not left shift the RHS to preserve the shift amount.
2269 auto LHS = MIRBuilder.buildAnyExt(Res: WideTy, Op: MI.getOperand(i: 1));
2270 auto RHS = IsShift ? MIRBuilder.buildZExt(Res: WideTy, Op: MI.getOperand(i: 2))
2271 : MIRBuilder.buildAnyExt(Res: WideTy, Op: MI.getOperand(i: 2));
2272 auto ShiftK = MIRBuilder.buildConstant(Res: WideTy, Val: SHLAmount);
2273 auto ShiftL = MIRBuilder.buildShl(Dst: WideTy, Src0: LHS, Src1: ShiftK);
2274 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(Dst: WideTy, Src0: RHS, Src1: ShiftK);
2275
2276 auto WideInst = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {WideTy},
2277 SrcOps: {ShiftL, ShiftR}, Flags: MI.getFlags());
2278
2279 // Use a shift that will preserve the number of sign bits when the trunc is
2280 // folded away.
2281 auto Result = IsSigned ? MIRBuilder.buildAShr(Dst: WideTy, Src0: WideInst, Src1: ShiftK)
2282 : MIRBuilder.buildLShr(Dst: WideTy, Src0: WideInst, Src1: ShiftK);
2283
2284 MIRBuilder.buildTrunc(Res: DstReg, Op: Result);
2285 MI.eraseFromParent();
2286 return Legalized;
2287}
2288
2289LegalizerHelper::LegalizeResult
2290LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2291 LLT WideTy) {
2292 if (TypeIdx == 1) {
2293 Observer.changingInstr(MI);
2294 widenScalarDst(MI, WideTy, OpIdx: 1);
2295 Observer.changedInstr(MI);
2296 return Legalized;
2297 }
2298
2299 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2300 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2301 LLT SrcTy = MRI.getType(Reg: LHS);
2302 LLT OverflowTy = MRI.getType(Reg: OriginalOverflow);
2303 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2304
2305 // To determine if the result overflowed in the larger type, we extend the
2306 // input to the larger type, do the multiply (checking if it overflows),
2307 // then also check the high bits of the result to see if overflow happened
2308 // there.
2309 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2310 auto LeftOperand = MIRBuilder.buildInstr(Opc: ExtOp, DstOps: {WideTy}, SrcOps: {LHS});
2311 auto RightOperand = MIRBuilder.buildInstr(Opc: ExtOp, DstOps: {WideTy}, SrcOps: {RHS});
2312
2313 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2314 // so we don't need to check the overflow result of larger type Mulo.
2315 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2316
2317 unsigned MulOpc =
2318 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2319
2320 MachineInstrBuilder Mulo;
2321 if (WideMulCanOverflow)
2322 Mulo = MIRBuilder.buildInstr(Opc: MulOpc, DstOps: {WideTy, OverflowTy},
2323 SrcOps: {LeftOperand, RightOperand});
2324 else
2325 Mulo = MIRBuilder.buildInstr(Opc: MulOpc, DstOps: {WideTy}, SrcOps: {LeftOperand, RightOperand});
2326
2327 auto Mul = Mulo->getOperand(i: 0);
2328 MIRBuilder.buildTrunc(Res: Result, Op: Mul);
2329
2330 MachineInstrBuilder ExtResult;
2331 // Overflow occurred if it occurred in the larger type, or if the high part
2332 // of the result does not zero/sign-extend the low part. Check this second
2333 // possibility first.
2334 if (IsSigned) {
2335 // For signed, overflow occurred when the high part does not sign-extend
2336 // the low part.
2337 ExtResult = MIRBuilder.buildSExtInReg(Res: WideTy, Op: Mul, ImmOp: SrcBitWidth);
2338 } else {
2339 // Unsigned overflow occurred when the high part does not zero-extend the
2340 // low part.
2341 ExtResult = MIRBuilder.buildZExtInReg(Res: WideTy, Op: Mul, ImmOp: SrcBitWidth);
2342 }
2343
2344 if (WideMulCanOverflow) {
2345 auto Overflow =
2346 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: OverflowTy, Op0: Mul, Op1: ExtResult);
2347 // Finally check if the multiplication in the larger type itself overflowed.
2348 MIRBuilder.buildOr(Dst: OriginalOverflow, Src0: Mulo->getOperand(i: 1), Src1: Overflow);
2349 } else {
2350 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: OriginalOverflow, Op0: Mul, Op1: ExtResult);
2351 }
2352 MI.eraseFromParent();
2353 return Legalized;
2354}
2355
2356LegalizerHelper::LegalizeResult
2357LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
2358 switch (MI.getOpcode()) {
2359 default:
2360 return UnableToLegalize;
2361 case TargetOpcode::G_ATOMICRMW_XCHG:
2362 case TargetOpcode::G_ATOMICRMW_ADD:
2363 case TargetOpcode::G_ATOMICRMW_SUB:
2364 case TargetOpcode::G_ATOMICRMW_AND:
2365 case TargetOpcode::G_ATOMICRMW_OR:
2366 case TargetOpcode::G_ATOMICRMW_XOR:
2367 case TargetOpcode::G_ATOMICRMW_MIN:
2368 case TargetOpcode::G_ATOMICRMW_MAX:
2369 case TargetOpcode::G_ATOMICRMW_UMIN:
2370 case TargetOpcode::G_ATOMICRMW_UMAX:
2371 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2372 Observer.changingInstr(MI);
2373 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
2374 widenScalarDst(MI, WideTy, OpIdx: 0);
2375 Observer.changedInstr(MI);
2376 return Legalized;
2377 case TargetOpcode::G_ATOMIC_CMPXCHG:
2378 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2379 Observer.changingInstr(MI);
2380 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
2381 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ANYEXT);
2382 widenScalarDst(MI, WideTy, OpIdx: 0);
2383 Observer.changedInstr(MI);
2384 return Legalized;
2385 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2386 if (TypeIdx == 0) {
2387 Observer.changingInstr(MI);
2388 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ANYEXT);
2389 widenScalarSrc(MI, WideTy, OpIdx: 4, ExtOpcode: TargetOpcode::G_ANYEXT);
2390 widenScalarDst(MI, WideTy, OpIdx: 0);
2391 Observer.changedInstr(MI);
2392 return Legalized;
2393 }
2394 assert(TypeIdx == 1 &&
2395 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2396 Observer.changingInstr(MI);
2397 widenScalarDst(MI, WideTy, OpIdx: 1);
2398 Observer.changedInstr(MI);
2399 return Legalized;
2400 case TargetOpcode::G_EXTRACT:
2401 return widenScalarExtract(MI, TypeIdx, WideTy);
2402 case TargetOpcode::G_INSERT:
2403 return widenScalarInsert(MI, TypeIdx, WideTy);
2404 case TargetOpcode::G_MERGE_VALUES:
2405 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2406 case TargetOpcode::G_UNMERGE_VALUES:
2407 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2408 case TargetOpcode::G_SADDO:
2409 case TargetOpcode::G_SSUBO:
2410 case TargetOpcode::G_UADDO:
2411 case TargetOpcode::G_USUBO:
2412 case TargetOpcode::G_SADDE:
2413 case TargetOpcode::G_SSUBE:
2414 case TargetOpcode::G_UADDE:
2415 case TargetOpcode::G_USUBE:
2416 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2417 case TargetOpcode::G_UMULO:
2418 case TargetOpcode::G_SMULO:
2419 return widenScalarMulo(MI, TypeIdx, WideTy);
2420 case TargetOpcode::G_SADDSAT:
2421 case TargetOpcode::G_SSUBSAT:
2422 case TargetOpcode::G_SSHLSAT:
2423 case TargetOpcode::G_UADDSAT:
2424 case TargetOpcode::G_USUBSAT:
2425 case TargetOpcode::G_USHLSAT:
2426 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2427 case TargetOpcode::G_CTTZ:
2428 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2429 case TargetOpcode::G_CTLZ:
2430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2431 case TargetOpcode::G_CTPOP: {
2432 if (TypeIdx == 0) {
2433 Observer.changingInstr(MI);
2434 widenScalarDst(MI, WideTy, OpIdx: 0);
2435 Observer.changedInstr(MI);
2436 return Legalized;
2437 }
2438
2439 Register SrcReg = MI.getOperand(i: 1).getReg();
2440
2441 // First extend the input.
2442 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2443 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2444 ? TargetOpcode::G_ANYEXT
2445 : TargetOpcode::G_ZEXT;
2446 auto MIBSrc = MIRBuilder.buildInstr(Opc: ExtOpc, DstOps: {WideTy}, SrcOps: {SrcReg});
2447 LLT CurTy = MRI.getType(Reg: SrcReg);
2448 unsigned NewOpc = MI.getOpcode();
2449 if (NewOpc == TargetOpcode::G_CTTZ) {
2450 // The count is the same in the larger type except if the original
2451 // value was zero. This can be handled by setting the bit just off
2452 // the top of the original type.
2453 auto TopBit =
2454 APInt::getOneBitSet(numBits: WideTy.getSizeInBits(), BitNo: CurTy.getSizeInBits());
2455 MIBSrc = MIRBuilder.buildOr(
2456 Dst: WideTy, Src0: MIBSrc, Src1: MIRBuilder.buildConstant(Res: WideTy, Val: TopBit));
2457 // Now we know the operand is non-zero, use the more relaxed opcode.
2458 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2459 }
2460
2461 // Perform the operation at the larger size.
2462 auto MIBNewOp = MIRBuilder.buildInstr(Opc: NewOpc, DstOps: {WideTy}, SrcOps: {MIBSrc});
2463 // This is already the correct result for CTPOP and CTTZs
2464 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2465 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2466 // The correct result is NewOp - (Difference in widety and current ty).
2467 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2468 MIBNewOp = MIRBuilder.buildSub(
2469 Dst: WideTy, Src0: MIBNewOp, Src1: MIRBuilder.buildConstant(Res: WideTy, Val: SizeDiff));
2470 }
2471
2472 MIRBuilder.buildZExtOrTrunc(Res: MI.getOperand(i: 0), Op: MIBNewOp);
2473 MI.eraseFromParent();
2474 return Legalized;
2475 }
2476 case TargetOpcode::G_BSWAP: {
2477 Observer.changingInstr(MI);
2478 Register DstReg = MI.getOperand(i: 0).getReg();
2479
2480 Register ShrReg = MRI.createGenericVirtualRegister(Ty: WideTy);
2481 Register DstExt = MRI.createGenericVirtualRegister(Ty: WideTy);
2482 Register ShiftAmtReg = MRI.createGenericVirtualRegister(Ty: WideTy);
2483 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2484
2485 MI.getOperand(i: 0).setReg(DstExt);
2486
2487 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
2488
2489 LLT Ty = MRI.getType(Reg: DstReg);
2490 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2491 MIRBuilder.buildConstant(Res: ShiftAmtReg, Val: DiffBits);
2492 MIRBuilder.buildLShr(Dst: ShrReg, Src0: DstExt, Src1: ShiftAmtReg);
2493
2494 MIRBuilder.buildTrunc(Res: DstReg, Op: ShrReg);
2495 Observer.changedInstr(MI);
2496 return Legalized;
2497 }
2498 case TargetOpcode::G_BITREVERSE: {
2499 Observer.changingInstr(MI);
2500
2501 Register DstReg = MI.getOperand(i: 0).getReg();
2502 LLT Ty = MRI.getType(Reg: DstReg);
2503 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2504
2505 Register DstExt = MRI.createGenericVirtualRegister(Ty: WideTy);
2506 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2507 MI.getOperand(i: 0).setReg(DstExt);
2508 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
2509
2510 auto ShiftAmt = MIRBuilder.buildConstant(Res: WideTy, Val: DiffBits);
2511 auto Shift = MIRBuilder.buildLShr(Dst: WideTy, Src0: DstExt, Src1: ShiftAmt);
2512 MIRBuilder.buildTrunc(Res: DstReg, Op: Shift);
2513 Observer.changedInstr(MI);
2514 return Legalized;
2515 }
2516 case TargetOpcode::G_FREEZE:
2517 Observer.changingInstr(MI);
2518 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2519 widenScalarDst(MI, WideTy);
2520 Observer.changedInstr(MI);
2521 return Legalized;
2522
2523 case TargetOpcode::G_ABS:
2524 Observer.changingInstr(MI);
2525 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_SEXT);
2526 widenScalarDst(MI, WideTy);
2527 Observer.changedInstr(MI);
2528 return Legalized;
2529
2530 case TargetOpcode::G_ADD:
2531 case TargetOpcode::G_AND:
2532 case TargetOpcode::G_MUL:
2533 case TargetOpcode::G_OR:
2534 case TargetOpcode::G_XOR:
2535 case TargetOpcode::G_SUB:
2536 case TargetOpcode::G_SHUFFLE_VECTOR:
2537 // Perform operation at larger width (any extension is fines here, high bits
2538 // don't affect the result) and then truncate the result back to the
2539 // original type.
2540 Observer.changingInstr(MI);
2541 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2542 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
2543 widenScalarDst(MI, WideTy);
2544 Observer.changedInstr(MI);
2545 return Legalized;
2546
2547 case TargetOpcode::G_SBFX:
2548 case TargetOpcode::G_UBFX:
2549 Observer.changingInstr(MI);
2550
2551 if (TypeIdx == 0) {
2552 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2553 widenScalarDst(MI, WideTy);
2554 } else {
2555 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
2556 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ZEXT);
2557 }
2558
2559 Observer.changedInstr(MI);
2560 return Legalized;
2561
2562 case TargetOpcode::G_SHL:
2563 Observer.changingInstr(MI);
2564
2565 if (TypeIdx == 0) {
2566 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2567 widenScalarDst(MI, WideTy);
2568 } else {
2569 assert(TypeIdx == 1);
2570 // The "number of bits to shift" operand must preserve its value as an
2571 // unsigned integer:
2572 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
2573 }
2574
2575 Observer.changedInstr(MI);
2576 return Legalized;
2577
2578 case TargetOpcode::G_ROTR:
2579 case TargetOpcode::G_ROTL:
2580 if (TypeIdx != 1)
2581 return UnableToLegalize;
2582
2583 Observer.changingInstr(MI);
2584 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
2585 Observer.changedInstr(MI);
2586 return Legalized;
2587
2588 case TargetOpcode::G_SDIV:
2589 case TargetOpcode::G_SREM:
2590 case TargetOpcode::G_SMIN:
2591 case TargetOpcode::G_SMAX:
2592 Observer.changingInstr(MI);
2593 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_SEXT);
2594 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_SEXT);
2595 widenScalarDst(MI, WideTy);
2596 Observer.changedInstr(MI);
2597 return Legalized;
2598
2599 case TargetOpcode::G_SDIVREM:
2600 Observer.changingInstr(MI);
2601 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_SEXT);
2602 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_SEXT);
2603 widenScalarDst(MI, WideTy);
2604 widenScalarDst(MI, WideTy, OpIdx: 1);
2605 Observer.changedInstr(MI);
2606 return Legalized;
2607
2608 case TargetOpcode::G_ASHR:
2609 case TargetOpcode::G_LSHR:
2610 Observer.changingInstr(MI);
2611
2612 if (TypeIdx == 0) {
2613 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2614 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2615
2616 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: CvtOp);
2617 widenScalarDst(MI, WideTy);
2618 } else {
2619 assert(TypeIdx == 1);
2620 // The "number of bits to shift" operand must preserve its value as an
2621 // unsigned integer:
2622 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
2623 }
2624
2625 Observer.changedInstr(MI);
2626 return Legalized;
2627 case TargetOpcode::G_UDIV:
2628 case TargetOpcode::G_UREM:
2629 case TargetOpcode::G_UMIN:
2630 case TargetOpcode::G_UMAX:
2631 Observer.changingInstr(MI);
2632 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ZEXT);
2633 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
2634 widenScalarDst(MI, WideTy);
2635 Observer.changedInstr(MI);
2636 return Legalized;
2637
2638 case TargetOpcode::G_UDIVREM:
2639 Observer.changingInstr(MI);
2640 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
2641 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ZEXT);
2642 widenScalarDst(MI, WideTy);
2643 widenScalarDst(MI, WideTy, OpIdx: 1);
2644 Observer.changedInstr(MI);
2645 return Legalized;
2646
2647 case TargetOpcode::G_SELECT:
2648 Observer.changingInstr(MI);
2649 if (TypeIdx == 0) {
2650 // Perform operation at larger width (any extension is fine here, high
2651 // bits don't affect the result) and then truncate the result back to the
2652 // original type.
2653 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
2654 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_ANYEXT);
2655 widenScalarDst(MI, WideTy);
2656 } else {
2657 bool IsVec = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).isVector();
2658 // Explicit extension is required here since high bits affect the result.
2659 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: MIRBuilder.getBoolExtOp(IsVec, IsFP: false));
2660 }
2661 Observer.changedInstr(MI);
2662 return Legalized;
2663
2664 case TargetOpcode::G_FPTOSI:
2665 case TargetOpcode::G_FPTOUI:
2666 case TargetOpcode::G_INTRINSIC_LRINT:
2667 case TargetOpcode::G_INTRINSIC_LLRINT:
2668 case TargetOpcode::G_IS_FPCLASS:
2669 Observer.changingInstr(MI);
2670
2671 if (TypeIdx == 0)
2672 widenScalarDst(MI, WideTy);
2673 else
2674 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_FPEXT);
2675
2676 Observer.changedInstr(MI);
2677 return Legalized;
2678 case TargetOpcode::G_SITOFP:
2679 Observer.changingInstr(MI);
2680
2681 if (TypeIdx == 0)
2682 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
2683 else
2684 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_SEXT);
2685
2686 Observer.changedInstr(MI);
2687 return Legalized;
2688 case TargetOpcode::G_UITOFP:
2689 Observer.changingInstr(MI);
2690
2691 if (TypeIdx == 0)
2692 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
2693 else
2694 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ZEXT);
2695
2696 Observer.changedInstr(MI);
2697 return Legalized;
2698 case TargetOpcode::G_LOAD:
2699 case TargetOpcode::G_SEXTLOAD:
2700 case TargetOpcode::G_ZEXTLOAD:
2701 Observer.changingInstr(MI);
2702 widenScalarDst(MI, WideTy);
2703 Observer.changedInstr(MI);
2704 return Legalized;
2705
2706 case TargetOpcode::G_STORE: {
2707 if (TypeIdx != 0)
2708 return UnableToLegalize;
2709
2710 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
2711 if (!Ty.isScalar())
2712 return UnableToLegalize;
2713
2714 Observer.changingInstr(MI);
2715
2716 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2717 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2718 widenScalarSrc(MI, WideTy, OpIdx: 0, ExtOpcode: ExtType);
2719
2720 Observer.changedInstr(MI);
2721 return Legalized;
2722 }
2723 case TargetOpcode::G_CONSTANT: {
2724 MachineOperand &SrcMO = MI.getOperand(i: 1);
2725 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2726 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2727 SmallTy: MRI.getType(Reg: MI.getOperand(i: 0).getReg()));
2728 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2729 ExtOpc == TargetOpcode::G_ANYEXT) &&
2730 "Illegal Extend");
2731 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2732 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2733 ? SrcVal.sext(width: WideTy.getSizeInBits())
2734 : SrcVal.zext(width: WideTy.getSizeInBits());
2735 Observer.changingInstr(MI);
2736 SrcMO.setCImm(ConstantInt::get(Context&: Ctx, V: Val));
2737
2738 widenScalarDst(MI, WideTy);
2739 Observer.changedInstr(MI);
2740 return Legalized;
2741 }
2742 case TargetOpcode::G_FCONSTANT: {
2743 // To avoid changing the bits of the constant due to extension to a larger
2744 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2745 MachineOperand &SrcMO = MI.getOperand(i: 1);
2746 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2747 MIRBuilder.setInstrAndDebugLoc(MI);
2748 auto IntCst = MIRBuilder.buildConstant(Res: MI.getOperand(i: 0).getReg(), Val);
2749 widenScalarDst(MI&: *IntCst, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_TRUNC);
2750 MI.eraseFromParent();
2751 return Legalized;
2752 }
2753 case TargetOpcode::G_IMPLICIT_DEF: {
2754 Observer.changingInstr(MI);
2755 widenScalarDst(MI, WideTy);
2756 Observer.changedInstr(MI);
2757 return Legalized;
2758 }
2759 case TargetOpcode::G_BRCOND:
2760 Observer.changingInstr(MI);
2761 widenScalarSrc(MI, WideTy, OpIdx: 0, ExtOpcode: MIRBuilder.getBoolExtOp(IsVec: false, IsFP: false));
2762 Observer.changedInstr(MI);
2763 return Legalized;
2764
2765 case TargetOpcode::G_FCMP:
2766 Observer.changingInstr(MI);
2767 if (TypeIdx == 0)
2768 widenScalarDst(MI, WideTy);
2769 else {
2770 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_FPEXT);
2771 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_FPEXT);
2772 }
2773 Observer.changedInstr(MI);
2774 return Legalized;
2775
2776 case TargetOpcode::G_ICMP:
2777 Observer.changingInstr(MI);
2778 if (TypeIdx == 0)
2779 widenScalarDst(MI, WideTy);
2780 else {
2781 unsigned ExtOpcode = CmpInst::isSigned(predicate: static_cast<CmpInst::Predicate>(
2782 MI.getOperand(i: 1).getPredicate()))
2783 ? TargetOpcode::G_SEXT
2784 : TargetOpcode::G_ZEXT;
2785 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode);
2786 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode);
2787 }
2788 Observer.changedInstr(MI);
2789 return Legalized;
2790
2791 case TargetOpcode::G_PTR_ADD:
2792 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2793 Observer.changingInstr(MI);
2794 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_SEXT);
2795 Observer.changedInstr(MI);
2796 return Legalized;
2797
2798 case TargetOpcode::G_PHI: {
2799 assert(TypeIdx == 0 && "Expecting only Idx 0");
2800
2801 Observer.changingInstr(MI);
2802 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2803 MachineBasicBlock &OpMBB = *MI.getOperand(i: I + 1).getMBB();
2804 MIRBuilder.setInsertPt(MBB&: OpMBB, II: OpMBB.getFirstTerminatorForward());
2805 widenScalarSrc(MI, WideTy, OpIdx: I, ExtOpcode: TargetOpcode::G_ANYEXT);
2806 }
2807
2808 MachineBasicBlock &MBB = *MI.getParent();
2809 MIRBuilder.setInsertPt(MBB, II: --MBB.getFirstNonPHI());
2810 widenScalarDst(MI, WideTy);
2811 Observer.changedInstr(MI);
2812 return Legalized;
2813 }
2814 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2815 if (TypeIdx == 0) {
2816 Register VecReg = MI.getOperand(i: 1).getReg();
2817 LLT VecTy = MRI.getType(Reg: VecReg);
2818 Observer.changingInstr(MI);
2819
2820 widenScalarSrc(
2821 MI, WideTy: LLT::vector(EC: VecTy.getElementCount(), ScalarSizeInBits: WideTy.getSizeInBits()), OpIdx: 1,
2822 ExtOpcode: TargetOpcode::G_ANYEXT);
2823
2824 widenScalarDst(MI, WideTy, OpIdx: 0);
2825 Observer.changedInstr(MI);
2826 return Legalized;
2827 }
2828
2829 if (TypeIdx != 2)
2830 return UnableToLegalize;
2831 Observer.changingInstr(MI);
2832 // TODO: Probably should be zext
2833 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_SEXT);
2834 Observer.changedInstr(MI);
2835 return Legalized;
2836 }
2837 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2838 if (TypeIdx == 0) {
2839 Observer.changingInstr(MI);
2840 const LLT WideEltTy = WideTy.getElementType();
2841
2842 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2843 widenScalarSrc(MI, WideTy: WideEltTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
2844 widenScalarDst(MI, WideTy, OpIdx: 0);
2845 Observer.changedInstr(MI);
2846 return Legalized;
2847 }
2848
2849 if (TypeIdx == 1) {
2850 Observer.changingInstr(MI);
2851
2852 Register VecReg = MI.getOperand(i: 1).getReg();
2853 LLT VecTy = MRI.getType(Reg: VecReg);
2854 LLT WideVecTy = LLT::vector(EC: VecTy.getElementCount(), ScalarTy: WideTy);
2855
2856 widenScalarSrc(MI, WideTy: WideVecTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2857 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ANYEXT);
2858 widenScalarDst(MI, WideTy: WideVecTy, OpIdx: 0);
2859 Observer.changedInstr(MI);
2860 return Legalized;
2861 }
2862
2863 if (TypeIdx == 2) {
2864 Observer.changingInstr(MI);
2865 // TODO: Probably should be zext
2866 widenScalarSrc(MI, WideTy, OpIdx: 3, ExtOpcode: TargetOpcode::G_SEXT);
2867 Observer.changedInstr(MI);
2868 return Legalized;
2869 }
2870
2871 return UnableToLegalize;
2872 }
2873 case TargetOpcode::G_FADD:
2874 case TargetOpcode::G_FMUL:
2875 case TargetOpcode::G_FSUB:
2876 case TargetOpcode::G_FMA:
2877 case TargetOpcode::G_FMAD:
2878 case TargetOpcode::G_FNEG:
2879 case TargetOpcode::G_FABS:
2880 case TargetOpcode::G_FCANONICALIZE:
2881 case TargetOpcode::G_FMINNUM:
2882 case TargetOpcode::G_FMAXNUM:
2883 case TargetOpcode::G_FMINNUM_IEEE:
2884 case TargetOpcode::G_FMAXNUM_IEEE:
2885 case TargetOpcode::G_FMINIMUM:
2886 case TargetOpcode::G_FMAXIMUM:
2887 case TargetOpcode::G_FDIV:
2888 case TargetOpcode::G_FREM:
2889 case TargetOpcode::G_FCEIL:
2890 case TargetOpcode::G_FFLOOR:
2891 case TargetOpcode::G_FCOS:
2892 case TargetOpcode::G_FSIN:
2893 case TargetOpcode::G_FLOG10:
2894 case TargetOpcode::G_FLOG:
2895 case TargetOpcode::G_FLOG2:
2896 case TargetOpcode::G_FRINT:
2897 case TargetOpcode::G_FNEARBYINT:
2898 case TargetOpcode::G_FSQRT:
2899 case TargetOpcode::G_FEXP:
2900 case TargetOpcode::G_FEXP2:
2901 case TargetOpcode::G_FEXP10:
2902 case TargetOpcode::G_FPOW:
2903 case TargetOpcode::G_INTRINSIC_TRUNC:
2904 case TargetOpcode::G_INTRINSIC_ROUND:
2905 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2906 assert(TypeIdx == 0);
2907 Observer.changingInstr(MI);
2908
2909 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2910 widenScalarSrc(MI, WideTy, OpIdx: I, ExtOpcode: TargetOpcode::G_FPEXT);
2911
2912 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
2913 Observer.changedInstr(MI);
2914 return Legalized;
2915 case TargetOpcode::G_FPOWI:
2916 case TargetOpcode::G_FLDEXP:
2917 case TargetOpcode::G_STRICT_FLDEXP: {
2918 if (TypeIdx == 0) {
2919 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2920 return UnableToLegalize;
2921
2922 Observer.changingInstr(MI);
2923 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_FPEXT);
2924 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
2925 Observer.changedInstr(MI);
2926 return Legalized;
2927 }
2928
2929 if (TypeIdx == 1) {
2930 // For some reason SelectionDAG tries to promote to a libcall without
2931 // actually changing the integer type for promotion.
2932 Observer.changingInstr(MI);
2933 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_SEXT);
2934 Observer.changedInstr(MI);
2935 return Legalized;
2936 }
2937
2938 return UnableToLegalize;
2939 }
2940 case TargetOpcode::G_FFREXP: {
2941 Observer.changingInstr(MI);
2942
2943 if (TypeIdx == 0) {
2944 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_FPEXT);
2945 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
2946 } else {
2947 widenScalarDst(MI, WideTy, OpIdx: 1);
2948 }
2949
2950 Observer.changedInstr(MI);
2951 return Legalized;
2952 }
2953 case TargetOpcode::G_INTTOPTR:
2954 if (TypeIdx != 1)
2955 return UnableToLegalize;
2956
2957 Observer.changingInstr(MI);
2958 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ZEXT);
2959 Observer.changedInstr(MI);
2960 return Legalized;
2961 case TargetOpcode::G_PTRTOINT:
2962 if (TypeIdx != 0)
2963 return UnableToLegalize;
2964
2965 Observer.changingInstr(MI);
2966 widenScalarDst(MI, WideTy, OpIdx: 0);
2967 Observer.changedInstr(MI);
2968 return Legalized;
2969 case TargetOpcode::G_BUILD_VECTOR: {
2970 Observer.changingInstr(MI);
2971
2972 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2973 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2974 widenScalarSrc(MI, WideTy: WideEltTy, OpIdx: I, ExtOpcode: TargetOpcode::G_ANYEXT);
2975
2976 // Avoid changing the result vector type if the source element type was
2977 // requested.
2978 if (TypeIdx == 1) {
2979 MI.setDesc(MIRBuilder.getTII().get(Opcode: TargetOpcode::G_BUILD_VECTOR_TRUNC));
2980 } else {
2981 widenScalarDst(MI, WideTy, OpIdx: 0);
2982 }
2983
2984 Observer.changedInstr(MI);
2985 return Legalized;
2986 }
2987 case TargetOpcode::G_SEXT_INREG:
2988 if (TypeIdx != 0)
2989 return UnableToLegalize;
2990
2991 Observer.changingInstr(MI);
2992 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
2993 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_TRUNC);
2994 Observer.changedInstr(MI);
2995 return Legalized;
2996 case TargetOpcode::G_PTRMASK: {
2997 if (TypeIdx != 1)
2998 return UnableToLegalize;
2999 Observer.changingInstr(MI);
3000 widenScalarSrc(MI, WideTy, OpIdx: 2, ExtOpcode: TargetOpcode::G_ZEXT);
3001 Observer.changedInstr(MI);
3002 return Legalized;
3003 }
3004 case TargetOpcode::G_VECREDUCE_FADD:
3005 case TargetOpcode::G_VECREDUCE_FMUL:
3006 case TargetOpcode::G_VECREDUCE_FMIN:
3007 case TargetOpcode::G_VECREDUCE_FMAX:
3008 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3009 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3010 if (TypeIdx != 0)
3011 return UnableToLegalize;
3012 Observer.changingInstr(MI);
3013 Register VecReg = MI.getOperand(i: 1).getReg();
3014 LLT VecTy = MRI.getType(Reg: VecReg);
3015 LLT WideVecTy = VecTy.isVector()
3016 ? LLT::vector(EC: VecTy.getElementCount(), ScalarTy: WideTy)
3017 : WideTy;
3018 widenScalarSrc(MI, WideTy: WideVecTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_FPEXT);
3019 widenScalarDst(MI, WideTy, OpIdx: 0, TruncOpcode: TargetOpcode::G_FPTRUNC);
3020 Observer.changedInstr(MI);
3021 return Legalized;
3022 }
3023 case TargetOpcode::G_VSCALE: {
3024 MachineOperand &SrcMO = MI.getOperand(i: 1);
3025 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3026 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3027 // The CImm is always a signed value
3028 const APInt Val = SrcVal.sext(width: WideTy.getSizeInBits());
3029 Observer.changingInstr(MI);
3030 SrcMO.setCImm(ConstantInt::get(Context&: Ctx, V: Val));
3031 widenScalarDst(MI, WideTy);
3032 Observer.changedInstr(MI);
3033 return Legalized;
3034 }
3035 case TargetOpcode::G_SPLAT_VECTOR: {
3036 if (TypeIdx != 1)
3037 return UnableToLegalize;
3038
3039 Observer.changingInstr(MI);
3040 widenScalarSrc(MI, WideTy, OpIdx: 1, ExtOpcode: TargetOpcode::G_ANYEXT);
3041 Observer.changedInstr(MI);
3042 return Legalized;
3043 }
3044 }
3045}
3046
3047static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
3048 MachineIRBuilder &B, Register Src, LLT Ty) {
3049 auto Unmerge = B.buildUnmerge(Res: Ty, Op: Src);
3050 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3051 Pieces.push_back(Elt: Unmerge.getReg(Idx: I));
3052}
3053
3054static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3055 MachineIRBuilder &MIRBuilder) {
3056 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3057 MachineFunction &MF = MIRBuilder.getMF();
3058 const DataLayout &DL = MIRBuilder.getDataLayout();
3059 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3060 LLT AddrPtrTy = LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace));
3061 LLT DstLLT = MRI.getType(Reg: DstReg);
3062
3063 Align Alignment(DL.getABITypeAlign(Ty: ConstVal->getType()));
3064
3065 auto Addr = MIRBuilder.buildConstantPool(
3066 Res: AddrPtrTy,
3067 Idx: MF.getConstantPool()->getConstantPoolIndex(C: ConstVal, Alignment));
3068
3069 MachineMemOperand *MMO =
3070 MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF),
3071 f: MachineMemOperand::MOLoad, MemTy: DstLLT, base_alignment: Alignment);
3072
3073 MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_LOAD, Res: DstReg, Addr, MMO&: *MMO);
3074}
3075
3076LegalizerHelper::LegalizeResult
3077LegalizerHelper::lowerConstant(MachineInstr &MI) {
3078 const MachineOperand &ConstOperand = MI.getOperand(i: 1);
3079 const Constant *ConstantVal = ConstOperand.getCImm();
3080
3081 emitLoadFromConstantPool(DstReg: MI.getOperand(i: 0).getReg(), ConstVal: ConstantVal, MIRBuilder);
3082 MI.eraseFromParent();
3083
3084 return Legalized;
3085}
3086
3087LegalizerHelper::LegalizeResult
3088LegalizerHelper::lowerFConstant(MachineInstr &MI) {
3089 const MachineOperand &ConstOperand = MI.getOperand(i: 1);
3090 const Constant *ConstantVal = ConstOperand.getFPImm();
3091
3092 emitLoadFromConstantPool(DstReg: MI.getOperand(i: 0).getReg(), ConstVal: ConstantVal, MIRBuilder);
3093 MI.eraseFromParent();
3094
3095 return Legalized;
3096}
3097
3098LegalizerHelper::LegalizeResult
3099LegalizerHelper::lowerBitcast(MachineInstr &MI) {
3100 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3101 if (SrcTy.isVector()) {
3102 LLT SrcEltTy = SrcTy.getElementType();
3103 SmallVector<Register, 8> SrcRegs;
3104
3105 if (DstTy.isVector()) {
3106 int NumDstElt = DstTy.getNumElements();
3107 int NumSrcElt = SrcTy.getNumElements();
3108
3109 LLT DstEltTy = DstTy.getElementType();
3110 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3111 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3112
3113 // If there's an element size mismatch, insert intermediate casts to match
3114 // the result element type.
3115 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3116 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3117 //
3118 // =>
3119 //
3120 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3121 // %3:_(<2 x s8>) = G_BITCAST %2
3122 // %4:_(<2 x s8>) = G_BITCAST %3
3123 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3124 DstCastTy = LLT::fixed_vector(NumElements: NumDstElt / NumSrcElt, ScalarTy: DstEltTy);
3125 SrcPartTy = SrcEltTy;
3126 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3127 //
3128 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3129 //
3130 // =>
3131 //
3132 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3133 // %3:_(s16) = G_BITCAST %2
3134 // %4:_(s16) = G_BITCAST %3
3135 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3136 SrcPartTy = LLT::fixed_vector(NumElements: NumSrcElt / NumDstElt, ScalarTy: SrcEltTy);
3137 DstCastTy = DstEltTy;
3138 }
3139
3140 getUnmergePieces(Pieces&: SrcRegs, B&: MIRBuilder, Src, Ty: SrcPartTy);
3141 for (Register &SrcReg : SrcRegs)
3142 SrcReg = MIRBuilder.buildBitcast(Dst: DstCastTy, Src: SrcReg).getReg(Idx: 0);
3143 } else
3144 getUnmergePieces(Pieces&: SrcRegs, B&: MIRBuilder, Src, Ty: SrcEltTy);
3145
3146 MIRBuilder.buildMergeLikeInstr(Res: Dst, Ops: SrcRegs);
3147 MI.eraseFromParent();
3148 return Legalized;
3149 }
3150
3151 if (DstTy.isVector()) {
3152 SmallVector<Register, 8> SrcRegs;
3153 getUnmergePieces(Pieces&: SrcRegs, B&: MIRBuilder, Src, Ty: DstTy.getElementType());
3154 MIRBuilder.buildMergeLikeInstr(Res: Dst, Ops: SrcRegs);
3155 MI.eraseFromParent();
3156 return Legalized;
3157 }
3158
3159 return UnableToLegalize;
3160}
3161
3162/// Figure out the bit offset into a register when coercing a vector index for
3163/// the wide element type. This is only for the case when promoting vector to
3164/// one with larger elements.
3165//
3166///
3167/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3168/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3169static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
3170 Register Idx,
3171 unsigned NewEltSize,
3172 unsigned OldEltSize) {
3173 const unsigned Log2EltRatio = Log2_32(Value: NewEltSize / OldEltSize);
3174 LLT IdxTy = B.getMRI()->getType(Reg: Idx);
3175
3176 // Now figure out the amount we need to shift to get the target bits.
3177 auto OffsetMask = B.buildConstant(
3178 Res: IdxTy, Val: ~(APInt::getAllOnes(numBits: IdxTy.getSizeInBits()) << Log2EltRatio));
3179 auto OffsetIdx = B.buildAnd(Dst: IdxTy, Src0: Idx, Src1: OffsetMask);
3180 return B.buildShl(Dst: IdxTy, Src0: OffsetIdx,
3181 Src1: B.buildConstant(Res: IdxTy, Val: Log2_32(Value: OldEltSize))).getReg(Idx: 0);
3182}
3183
3184/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3185/// is casting to a vector with a smaller element size, perform multiple element
3186/// extracts and merge the results. If this is coercing to a vector with larger
3187/// elements, index the bitcasted vector and extract the target element with bit
3188/// operations. This is intended to force the indexing in the native register
3189/// size for architectures that can dynamically index the register file.
3190LegalizerHelper::LegalizeResult
3191LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
3192 LLT CastTy) {
3193 if (TypeIdx != 1)
3194 return UnableToLegalize;
3195
3196 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3197
3198 LLT SrcEltTy = SrcVecTy.getElementType();
3199 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3200 unsigned OldNumElts = SrcVecTy.getNumElements();
3201
3202 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3203 Register CastVec = MIRBuilder.buildBitcast(Dst: CastTy, Src: SrcVec).getReg(Idx: 0);
3204
3205 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3206 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3207 if (NewNumElts > OldNumElts) {
3208 // Decreasing the vector element size
3209 //
3210 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3211 // =>
3212 // v4i32:castx = bitcast x:v2i64
3213 //
3214 // i64 = bitcast
3215 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3216 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3217 //
3218 if (NewNumElts % OldNumElts != 0)
3219 return UnableToLegalize;
3220
3221 // Type of the intermediate result vector.
3222 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3223 LLT MidTy =
3224 LLT::scalarOrVector(EC: ElementCount::getFixed(MinVal: NewEltsPerOldElt), ScalarTy: NewEltTy);
3225
3226 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(Res: IdxTy, Val: NewEltsPerOldElt);
3227
3228 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3229 auto NewBaseIdx = MIRBuilder.buildMul(Dst: IdxTy, Src0: Idx, Src1: NewEltsPerOldEltK);
3230
3231 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3232 auto IdxOffset = MIRBuilder.buildConstant(Res: IdxTy, Val: I);
3233 auto TmpIdx = MIRBuilder.buildAdd(Dst: IdxTy, Src0: NewBaseIdx, Src1: IdxOffset);
3234 auto Elt = MIRBuilder.buildExtractVectorElement(Res: NewEltTy, Val: CastVec, Idx: TmpIdx);
3235 NewOps[I] = Elt.getReg(Idx: 0);
3236 }
3237
3238 auto NewVec = MIRBuilder.buildBuildVector(Res: MidTy, Ops: NewOps);
3239 MIRBuilder.buildBitcast(Dst, Src: NewVec);
3240 MI.eraseFromParent();
3241 return Legalized;
3242 }
3243
3244 if (NewNumElts < OldNumElts) {
3245 if (NewEltSize % OldEltSize != 0)
3246 return UnableToLegalize;
3247
3248 // This only depends on powers of 2 because we use bit tricks to figure out
3249 // the bit offset we need to shift to get the target element. A general
3250 // expansion could emit division/multiply.
3251 if (!isPowerOf2_32(Value: NewEltSize / OldEltSize))
3252 return UnableToLegalize;
3253
3254 // Increasing the vector element size.
3255 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3256 //
3257 // =>
3258 //
3259 // %cast = G_BITCAST %vec
3260 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3261 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3262 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3263 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3264 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3265 // %elt = G_TRUNC %elt_bits
3266
3267 const unsigned Log2EltRatio = Log2_32(Value: NewEltSize / OldEltSize);
3268 auto Log2Ratio = MIRBuilder.buildConstant(Res: IdxTy, Val: Log2EltRatio);
3269
3270 // Divide to get the index in the wider element type.
3271 auto ScaledIdx = MIRBuilder.buildLShr(Dst: IdxTy, Src0: Idx, Src1: Log2Ratio);
3272
3273 Register WideElt = CastVec;
3274 if (CastTy.isVector()) {
3275 WideElt = MIRBuilder.buildExtractVectorElement(Res: NewEltTy, Val: CastVec,
3276 Idx: ScaledIdx).getReg(Idx: 0);
3277 }
3278
3279 // Compute the bit offset into the register of the target element.
3280 Register OffsetBits = getBitcastWiderVectorElementOffset(
3281 B&: MIRBuilder, Idx, NewEltSize, OldEltSize);
3282
3283 // Shift the wide element to get the target element.
3284 auto ExtractedBits = MIRBuilder.buildLShr(Dst: NewEltTy, Src0: WideElt, Src1: OffsetBits);
3285 MIRBuilder.buildTrunc(Res: Dst, Op: ExtractedBits);
3286 MI.eraseFromParent();
3287 return Legalized;
3288 }
3289
3290 return UnableToLegalize;
3291}
3292
3293/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3294/// TargetReg, while preserving other bits in \p TargetReg.
3295///
3296/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3297static Register buildBitFieldInsert(MachineIRBuilder &B,
3298 Register TargetReg, Register InsertReg,
3299 Register OffsetBits) {
3300 LLT TargetTy = B.getMRI()->getType(Reg: TargetReg);
3301 LLT InsertTy = B.getMRI()->getType(Reg: InsertReg);
3302 auto ZextVal = B.buildZExt(Res: TargetTy, Op: InsertReg);
3303 auto ShiftedInsertVal = B.buildShl(Dst: TargetTy, Src0: ZextVal, Src1: OffsetBits);
3304
3305 // Produce a bitmask of the value to insert
3306 auto EltMask = B.buildConstant(
3307 Res: TargetTy, Val: APInt::getLowBitsSet(numBits: TargetTy.getSizeInBits(),
3308 loBitsSet: InsertTy.getSizeInBits()));
3309 // Shift it into position
3310 auto ShiftedMask = B.buildShl(Dst: TargetTy, Src0: EltMask, Src1: OffsetBits);
3311 auto InvShiftedMask = B.buildNot(Dst: TargetTy, Src0: ShiftedMask);
3312
3313 // Clear out the bits in the wide element
3314 auto MaskedOldElt = B.buildAnd(Dst: TargetTy, Src0: TargetReg, Src1: InvShiftedMask);
3315
3316 // The value to insert has all zeros already, so stick it into the masked
3317 // wide element.
3318 return B.buildOr(Dst: TargetTy, Src0: MaskedOldElt, Src1: ShiftedInsertVal).getReg(Idx: 0);
3319}
3320
3321/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3322/// is increasing the element size, perform the indexing in the target element
3323/// type, and use bit operations to insert at the element position. This is
3324/// intended for architectures that can dynamically index the register file and
3325/// want to force indexing in the native register size.
3326LegalizerHelper::LegalizeResult
3327LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
3328 LLT CastTy) {
3329 if (TypeIdx != 0)
3330 return UnableToLegalize;
3331
3332 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3333 MI.getFirst4RegLLTs();
3334 LLT VecTy = DstTy;
3335
3336 LLT VecEltTy = VecTy.getElementType();
3337 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3338 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3339 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3340
3341 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3342 unsigned OldNumElts = VecTy.getNumElements();
3343
3344 Register CastVec = MIRBuilder.buildBitcast(Dst: CastTy, Src: SrcVec).getReg(Idx: 0);
3345 if (NewNumElts < OldNumElts) {
3346 if (NewEltSize % OldEltSize != 0)
3347 return UnableToLegalize;
3348
3349 // This only depends on powers of 2 because we use bit tricks to figure out
3350 // the bit offset we need to shift to get the target element. A general
3351 // expansion could emit division/multiply.
3352 if (!isPowerOf2_32(Value: NewEltSize / OldEltSize))
3353 return UnableToLegalize;
3354
3355 const unsigned Log2EltRatio = Log2_32(Value: NewEltSize / OldEltSize);
3356 auto Log2Ratio = MIRBuilder.buildConstant(Res: IdxTy, Val: Log2EltRatio);
3357
3358 // Divide to get the index in the wider element type.
3359 auto ScaledIdx = MIRBuilder.buildLShr(Dst: IdxTy, Src0: Idx, Src1: Log2Ratio);
3360
3361 Register ExtractedElt = CastVec;
3362 if (CastTy.isVector()) {
3363 ExtractedElt = MIRBuilder.buildExtractVectorElement(Res: NewEltTy, Val: CastVec,
3364 Idx: ScaledIdx).getReg(Idx: 0);
3365 }
3366
3367 // Compute the bit offset into the register of the target element.
3368 Register OffsetBits = getBitcastWiderVectorElementOffset(
3369 B&: MIRBuilder, Idx, NewEltSize, OldEltSize);
3370
3371 Register InsertedElt = buildBitFieldInsert(B&: MIRBuilder, TargetReg: ExtractedElt,
3372 InsertReg: Val, OffsetBits);
3373 if (CastTy.isVector()) {
3374 InsertedElt = MIRBuilder.buildInsertVectorElement(
3375 Res: CastTy, Val: CastVec, Elt: InsertedElt, Idx: ScaledIdx).getReg(Idx: 0);
3376 }
3377
3378 MIRBuilder.buildBitcast(Dst, Src: InsertedElt);
3379 MI.eraseFromParent();
3380 return Legalized;
3381 }
3382
3383 return UnableToLegalize;
3384}
3385
3386LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
3387 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3388 Register DstReg = LoadMI.getDstReg();
3389 Register PtrReg = LoadMI.getPointerReg();
3390 LLT DstTy = MRI.getType(Reg: DstReg);
3391 MachineMemOperand &MMO = LoadMI.getMMO();
3392 LLT MemTy = MMO.getMemoryType();
3393 MachineFunction &MF = MIRBuilder.getMF();
3394
3395 unsigned MemSizeInBits = MemTy.getSizeInBits();
3396 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3397
3398 if (MemSizeInBits != MemStoreSizeInBits) {
3399 if (MemTy.isVector())
3400 return UnableToLegalize;
3401
3402 // Promote to a byte-sized load if not loading an integral number of
3403 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3404 LLT WideMemTy = LLT::scalar(SizeInBits: MemStoreSizeInBits);
3405 MachineMemOperand *NewMMO =
3406 MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: WideMemTy);
3407
3408 Register LoadReg = DstReg;
3409 LLT LoadTy = DstTy;
3410
3411 // If this wasn't already an extending load, we need to widen the result
3412 // register to avoid creating a load with a narrower result than the source.
3413 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3414 LoadTy = WideMemTy;
3415 LoadReg = MRI.createGenericVirtualRegister(Ty: WideMemTy);
3416 }
3417
3418 if (isa<GSExtLoad>(Val: LoadMI)) {
3419 auto NewLoad = MIRBuilder.buildLoad(Res: LoadTy, Addr: PtrReg, MMO&: *NewMMO);
3420 MIRBuilder.buildSExtInReg(Res: LoadReg, Op: NewLoad, ImmOp: MemSizeInBits);
3421 } else if (isa<GZExtLoad>(Val: LoadMI) || WideMemTy == LoadTy) {
3422 auto NewLoad = MIRBuilder.buildLoad(Res: LoadTy, Addr: PtrReg, MMO&: *NewMMO);
3423 // The extra bits are guaranteed to be zero, since we stored them that
3424 // way. A zext load from Wide thus automatically gives zext from MemVT.
3425 MIRBuilder.buildAssertZExt(Res: LoadReg, Op: NewLoad, Size: MemSizeInBits);
3426 } else {
3427 MIRBuilder.buildLoad(Res: LoadReg, Addr: PtrReg, MMO&: *NewMMO);
3428 }
3429
3430 if (DstTy != LoadTy)
3431 MIRBuilder.buildTrunc(Res: DstReg, Op: LoadReg);
3432
3433 LoadMI.eraseFromParent();
3434 return Legalized;
3435 }
3436
3437 // Big endian lowering not implemented.
3438 if (MIRBuilder.getDataLayout().isBigEndian())
3439 return UnableToLegalize;
3440
3441 // This load needs splitting into power of 2 sized loads.
3442 //
3443 // Our strategy here is to generate anyextending loads for the smaller
3444 // types up to next power-2 result type, and then combine the two larger
3445 // result values together, before truncating back down to the non-pow-2
3446 // type.
3447 // E.g. v1 = i24 load =>
3448 // v2 = i32 zextload (2 byte)
3449 // v3 = i32 load (1 byte)
3450 // v4 = i32 shl v3, 16
3451 // v5 = i32 or v4, v2
3452 // v1 = i24 trunc v5
3453 // By doing this we generate the correct truncate which should get
3454 // combined away as an artifact with a matching extend.
3455
3456 uint64_t LargeSplitSize, SmallSplitSize;
3457
3458 if (!isPowerOf2_32(Value: MemSizeInBits)) {
3459 // This load needs splitting into power of 2 sized loads.
3460 LargeSplitSize = llvm::bit_floor(Value: MemSizeInBits);
3461 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3462 } else {
3463 // This is already a power of 2, but we still need to split this in half.
3464 //
3465 // Assume we're being asked to decompose an unaligned load.
3466 // TODO: If this requires multiple splits, handle them all at once.
3467 auto &Ctx = MF.getFunction().getContext();
3468 if (TLI.allowsMemoryAccess(Context&: Ctx, DL: MIRBuilder.getDataLayout(), Ty: MemTy, MMO))
3469 return UnableToLegalize;
3470
3471 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3472 }
3473
3474 if (MemTy.isVector()) {
3475 // TODO: Handle vector extloads
3476 if (MemTy != DstTy)
3477 return UnableToLegalize;
3478
3479 // TODO: We can do better than scalarizing the vector and at least split it
3480 // in half.
3481 return reduceLoadStoreWidth(MI&: LoadMI, TypeIdx: 0, NarrowTy: DstTy.getElementType());
3482 }
3483
3484 MachineMemOperand *LargeMMO =
3485 MF.getMachineMemOperand(MMO: &MMO, Offset: 0, Size: LargeSplitSize / 8);
3486 MachineMemOperand *SmallMMO =
3487 MF.getMachineMemOperand(MMO: &MMO, Offset: LargeSplitSize / 8, Size: SmallSplitSize / 8);
3488
3489 LLT PtrTy = MRI.getType(Reg: PtrReg);
3490 unsigned AnyExtSize = PowerOf2Ceil(A: DstTy.getSizeInBits());
3491 LLT AnyExtTy = LLT::scalar(SizeInBits: AnyExtSize);
3492 auto LargeLoad = MIRBuilder.buildLoadInstr(Opcode: TargetOpcode::G_ZEXTLOAD, Res: AnyExtTy,
3493 Addr: PtrReg, MMO&: *LargeMMO);
3494
3495 auto OffsetCst = MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: PtrTy.getSizeInBits()),
3496 Val: LargeSplitSize / 8);
3497 Register PtrAddReg = MRI.createGenericVirtualRegister(Ty: PtrTy);
3498 auto SmallPtr = MIRBuilder.buildPtrAdd(Res: PtrAddReg, Op0: PtrReg, Op1: OffsetCst);
3499 auto SmallLoad = MIRBuilder.buildLoadInstr(Opcode: LoadMI.getOpcode(), Res: AnyExtTy,
3500 Addr: SmallPtr, MMO&: *SmallMMO);
3501
3502 auto ShiftAmt = MIRBuilder.buildConstant(Res: AnyExtTy, Val: LargeSplitSize);
3503 auto Shift = MIRBuilder.buildShl(Dst: AnyExtTy, Src0: SmallLoad, Src1: ShiftAmt);
3504
3505 if (AnyExtTy == DstTy)
3506 MIRBuilder.buildOr(Dst: DstReg, Src0: Shift, Src1: LargeLoad);
3507 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3508 auto Or = MIRBuilder.buildOr(Dst: AnyExtTy, Src0: Shift, Src1: LargeLoad);
3509 MIRBuilder.buildTrunc(Res: DstReg, Op: {Or});
3510 } else {
3511 assert(DstTy.isPointer() && "expected pointer");
3512 auto Or = MIRBuilder.buildOr(Dst: AnyExtTy, Src0: Shift, Src1: LargeLoad);
3513
3514 // FIXME: We currently consider this to be illegal for non-integral address
3515 // spaces, but we need still need a way to reinterpret the bits.
3516 MIRBuilder.buildIntToPtr(Dst: DstReg, Src: Or);
3517 }
3518
3519 LoadMI.eraseFromParent();
3520 return Legalized;
3521}
3522
3523LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
3524 // Lower a non-power of 2 store into multiple pow-2 stores.
3525 // E.g. split an i24 store into an i16 store + i8 store.
3526 // We do this by first extending the stored value to the next largest power
3527 // of 2 type, and then using truncating stores to store the components.
3528 // By doing this, likewise with G_LOAD, generate an extend that can be
3529 // artifact-combined away instead of leaving behind extracts.
3530 Register SrcReg = StoreMI.getValueReg();
3531 Register PtrReg = StoreMI.getPointerReg();
3532 LLT SrcTy = MRI.getType(Reg: SrcReg);
3533 MachineFunction &MF = MIRBuilder.getMF();
3534 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3535 LLT MemTy = MMO.getMemoryType();
3536
3537 unsigned StoreWidth = MemTy.getSizeInBits();
3538 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3539
3540 if (StoreWidth != StoreSizeInBits) {
3541 if (SrcTy.isVector())
3542 return UnableToLegalize;
3543
3544 // Promote to a byte-sized store with upper bits zero if not
3545 // storing an integral number of bytes. For example, promote
3546 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3547 LLT WideTy = LLT::scalar(SizeInBits: StoreSizeInBits);
3548
3549 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3550 // Avoid creating a store with a narrower source than result.
3551 SrcReg = MIRBuilder.buildAnyExt(Res: WideTy, Op: SrcReg).getReg(Idx: 0);
3552 SrcTy = WideTy;
3553 }
3554
3555 auto ZextInReg = MIRBuilder.buildZExtInReg(Res: SrcTy, Op: SrcReg, ImmOp: StoreWidth);
3556
3557 MachineMemOperand *NewMMO =
3558 MF.getMachineMemOperand(MMO: &MMO, PtrInfo: MMO.getPointerInfo(), Ty: WideTy);
3559 MIRBuilder.buildStore(Val: ZextInReg, Addr: PtrReg, MMO&: *NewMMO);
3560 StoreMI.eraseFromParent();
3561 return Legalized;
3562 }
3563
3564 if (MemTy.isVector()) {
3565 // TODO: Handle vector trunc stores
3566 if (MemTy != SrcTy)
3567 return UnableToLegalize;
3568
3569 // TODO: We can do better than scalarizing the vector and at least split it
3570 // in half.
3571 return reduceLoadStoreWidth(MI&: StoreMI, TypeIdx: 0, NarrowTy: SrcTy.getElementType());
3572 }
3573
3574 unsigned MemSizeInBits = MemTy.getSizeInBits();
3575 uint64_t LargeSplitSize, SmallSplitSize;
3576
3577 if (!isPowerOf2_32(Value: MemSizeInBits)) {
3578 LargeSplitSize = llvm::bit_floor<uint64_t>(Value: MemTy.getSizeInBits());
3579 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3580 } else {
3581 auto &Ctx = MF.getFunction().getContext();
3582 if (TLI.allowsMemoryAccess(Context&: Ctx, DL: MIRBuilder.getDataLayout(), Ty: MemTy, MMO))
3583 return UnableToLegalize; // Don't know what we're being asked to do.
3584
3585 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3586 }
3587
3588 // Extend to the next pow-2. If this store was itself the result of lowering,
3589 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3590 // that's wider than the stored size.
3591 unsigned AnyExtSize = PowerOf2Ceil(A: MemTy.getSizeInBits());
3592 const LLT NewSrcTy = LLT::scalar(SizeInBits: AnyExtSize);
3593
3594 if (SrcTy.isPointer()) {
3595 const LLT IntPtrTy = LLT::scalar(SizeInBits: SrcTy.getSizeInBits());
3596 SrcReg = MIRBuilder.buildPtrToInt(Dst: IntPtrTy, Src: SrcReg).getReg(Idx: 0);
3597 }
3598
3599 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(Res: NewSrcTy, Op: SrcReg);
3600
3601 // Obtain the smaller value by shifting away the larger value.
3602 auto ShiftAmt = MIRBuilder.buildConstant(Res: NewSrcTy, Val: LargeSplitSize);
3603 auto SmallVal = MIRBuilder.buildLShr(Dst: NewSrcTy, Src0: ExtVal, Src1: ShiftAmt);
3604
3605 // Generate the PtrAdd and truncating stores.
3606 LLT PtrTy = MRI.getType(Reg: PtrReg);
3607 auto OffsetCst = MIRBuilder.buildConstant(
3608 Res: LLT::scalar(SizeInBits: PtrTy.getSizeInBits()), Val: LargeSplitSize / 8);
3609 auto SmallPtr =
3610 MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: PtrReg, Op1: OffsetCst);
3611
3612 MachineMemOperand *LargeMMO =
3613 MF.getMachineMemOperand(MMO: &MMO, Offset: 0, Size: LargeSplitSize / 8);
3614 MachineMemOperand *SmallMMO =
3615 MF.getMachineMemOperand(MMO: &MMO, Offset: LargeSplitSize / 8, Size: SmallSplitSize / 8);
3616 MIRBuilder.buildStore(Val: ExtVal, Addr: PtrReg, MMO&: *LargeMMO);
3617 MIRBuilder.buildStore(Val: SmallVal, Addr: SmallPtr, MMO&: *SmallMMO);
3618 StoreMI.eraseFromParent();
3619 return Legalized;
3620}
3621
3622LegalizerHelper::LegalizeResult
3623LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3624 switch (MI.getOpcode()) {
3625 case TargetOpcode::G_LOAD: {
3626 if (TypeIdx != 0)
3627 return UnableToLegalize;
3628 MachineMemOperand &MMO = **MI.memoperands_begin();
3629
3630 // Not sure how to interpret a bitcast of an extending load.
3631 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3632 return UnableToLegalize;
3633
3634 Observer.changingInstr(MI);
3635 bitcastDst(MI, CastTy, OpIdx: 0);
3636 MMO.setType(CastTy);
3637 Observer.changedInstr(MI);
3638 return Legalized;
3639 }
3640 case TargetOpcode::G_STORE: {
3641 if (TypeIdx != 0)
3642 return UnableToLegalize;
3643
3644 MachineMemOperand &MMO = **MI.memoperands_begin();
3645
3646 // Not sure how to interpret a bitcast of a truncating store.
3647 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3648 return UnableToLegalize;
3649
3650 Observer.changingInstr(MI);
3651 bitcastSrc(MI, CastTy, OpIdx: 0);
3652 MMO.setType(CastTy);
3653 Observer.changedInstr(MI);
3654 return Legalized;
3655 }
3656 case TargetOpcode::G_SELECT: {
3657 if (TypeIdx != 0)
3658 return UnableToLegalize;
3659
3660 if (MRI.getType(Reg: MI.getOperand(i: 1).getReg()).isVector()) {
3661 LLVM_DEBUG(
3662 dbgs() << "bitcast action not implemented for vector select\n");
3663 return UnableToLegalize;
3664 }
3665
3666 Observer.changingInstr(MI);
3667 bitcastSrc(MI, CastTy, OpIdx: 2);
3668 bitcastSrc(MI, CastTy, OpIdx: 3);
3669 bitcastDst(MI, CastTy, OpIdx: 0);
3670 Observer.changedInstr(MI);
3671 return Legalized;
3672 }
3673 case TargetOpcode::G_AND:
3674 case TargetOpcode::G_OR:
3675 case TargetOpcode::G_XOR: {
3676 Observer.changingInstr(MI);
3677 bitcastSrc(MI, CastTy, OpIdx: 1);
3678 bitcastSrc(MI, CastTy, OpIdx: 2);
3679 bitcastDst(MI, CastTy, OpIdx: 0);
3680 Observer.changedInstr(MI);
3681 return Legalized;
3682 }
3683 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3684 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3685 case TargetOpcode::G_INSERT_VECTOR_ELT:
3686 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3687 default:
3688 return UnableToLegalize;
3689 }
3690}
3691
3692// Legalize an instruction by changing the opcode in place.
3693void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3694 Observer.changingInstr(MI);
3695 MI.setDesc(MIRBuilder.getTII().get(Opcode: NewOpcode));
3696 Observer.changedInstr(MI);
3697}
3698
3699LegalizerHelper::LegalizeResult
3700LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3701 using namespace TargetOpcode;
3702
3703 switch(MI.getOpcode()) {
3704 default:
3705 return UnableToLegalize;
3706 case TargetOpcode::G_FCONSTANT:
3707 return lowerFConstant(MI);
3708 case TargetOpcode::G_BITCAST:
3709 return lowerBitcast(MI);
3710 case TargetOpcode::G_SREM:
3711 case TargetOpcode::G_UREM: {
3712 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
3713 auto Quot =
3714 MIRBuilder.buildInstr(Opc: MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, DstOps: {Ty},
3715 SrcOps: {MI.getOperand(i: 1), MI.getOperand(i: 2)});
3716
3717 auto Prod = MIRBuilder.buildMul(Dst: Ty, Src0: Quot, Src1: MI.getOperand(i: 2));
3718 MIRBuilder.buildSub(Dst: MI.getOperand(i: 0), Src0: MI.getOperand(i: 1), Src1: Prod);
3719 MI.eraseFromParent();
3720 return Legalized;
3721 }
3722 case TargetOpcode::G_SADDO:
3723 case TargetOpcode::G_SSUBO:
3724 return lowerSADDO_SSUBO(MI);
3725 case TargetOpcode::G_UMULH:
3726 case TargetOpcode::G_SMULH:
3727 return lowerSMULH_UMULH(MI);
3728 case TargetOpcode::G_SMULO:
3729 case TargetOpcode::G_UMULO: {
3730 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3731 // result.
3732 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3733 LLT Ty = MRI.getType(Reg: Res);
3734
3735 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3736 ? TargetOpcode::G_SMULH
3737 : TargetOpcode::G_UMULH;
3738
3739 Observer.changingInstr(MI);
3740 const auto &TII = MIRBuilder.getTII();
3741 MI.setDesc(TII.get(Opcode: TargetOpcode::G_MUL));
3742 MI.removeOperand(OpNo: 1);
3743 Observer.changedInstr(MI);
3744
3745 auto HiPart = MIRBuilder.buildInstr(Opc: Opcode, DstOps: {Ty}, SrcOps: {LHS, RHS});
3746 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
3747
3748 // Move insert point forward so we can use the Res register if needed.
3749 MIRBuilder.setInsertPt(MBB&: MIRBuilder.getMBB(), II: ++MIRBuilder.getInsertPt());
3750
3751 // For *signed* multiply, overflow is detected by checking:
3752 // (hi != (lo >> bitwidth-1))
3753 if (Opcode == TargetOpcode::G_SMULH) {
3754 auto ShiftAmt = MIRBuilder.buildConstant(Res: Ty, Val: Ty.getSizeInBits() - 1);
3755 auto Shifted = MIRBuilder.buildAShr(Dst: Ty, Src0: Res, Src1: ShiftAmt);
3756 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: Overflow, Op0: HiPart, Op1: Shifted);
3757 } else {
3758 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: Overflow, Op0: HiPart, Op1: Zero);
3759 }
3760 return Legalized;
3761 }
3762 case TargetOpcode::G_FNEG: {
3763 auto [Res, SubByReg] = MI.getFirst2Regs();
3764 LLT Ty = MRI.getType(Reg: Res);
3765
3766 // TODO: Handle vector types once we are able to
3767 // represent them.
3768 if (Ty.isVector())
3769 return UnableToLegalize;
3770 auto SignMask =
3771 MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignMask(BitWidth: Ty.getSizeInBits()));
3772 MIRBuilder.buildXor(Dst: Res, Src0: SubByReg, Src1: SignMask);
3773 MI.eraseFromParent();
3774 return Legalized;
3775 }
3776 case TargetOpcode::G_FSUB:
3777 case TargetOpcode::G_STRICT_FSUB: {
3778 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3779 LLT Ty = MRI.getType(Reg: Res);
3780
3781 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3782 auto Neg = MIRBuilder.buildFNeg(Dst: Ty, Src0: RHS);
3783
3784 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3785 MIRBuilder.buildStrictFAdd(Dst: Res, Src0: LHS, Src1: Neg, Flags: MI.getFlags());
3786 else
3787 MIRBuilder.buildFAdd(Dst: Res, Src0: LHS, Src1: Neg, Flags: MI.getFlags());
3788
3789 MI.eraseFromParent();
3790 return Legalized;
3791 }
3792 case TargetOpcode::G_FMAD:
3793 return lowerFMad(MI);
3794 case TargetOpcode::G_FFLOOR:
3795 return lowerFFloor(MI);
3796 case TargetOpcode::G_INTRINSIC_ROUND:
3797 return lowerIntrinsicRound(MI);
3798 case TargetOpcode::G_FRINT: {
3799 // Since round even is the assumed rounding mode for unconstrained FP
3800 // operations, rint and roundeven are the same operation.
3801 changeOpcode(MI, NewOpcode: TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3802 return Legalized;
3803 }
3804 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3805 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3806 Register NewOldValRes = MRI.cloneVirtualRegister(VReg: OldValRes);
3807 MIRBuilder.buildAtomicCmpXchg(OldValRes: NewOldValRes, Addr, CmpVal, NewVal,
3808 MMO&: **MI.memoperands_begin());
3809 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: SuccessRes, Op0: NewOldValRes, Op1: CmpVal);
3810 MIRBuilder.buildCopy(Res: OldValRes, Op: NewOldValRes);
3811 MI.eraseFromParent();
3812 return Legalized;
3813 }
3814 case TargetOpcode::G_LOAD:
3815 case TargetOpcode::G_SEXTLOAD:
3816 case TargetOpcode::G_ZEXTLOAD:
3817 return lowerLoad(LoadMI&: cast<GAnyLoad>(Val&: MI));
3818 case TargetOpcode::G_STORE:
3819 return lowerStore(StoreMI&: cast<GStore>(Val&: MI));
3820 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3821 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3822 case TargetOpcode::G_CTLZ:
3823 case TargetOpcode::G_CTTZ:
3824 case TargetOpcode::G_CTPOP:
3825 return lowerBitCount(MI);
3826 case G_UADDO: {
3827 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3828
3829 Register NewRes = MRI.cloneVirtualRegister(VReg: Res);
3830
3831 MIRBuilder.buildAdd(Dst: NewRes, Src0: LHS, Src1: RHS);
3832 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_ULT, Res: CarryOut, Op0: NewRes, Op1: RHS);
3833
3834 MIRBuilder.buildCopy(Res, Op: NewRes);
3835
3836 MI.eraseFromParent();
3837 return Legalized;
3838 }
3839 case G_UADDE: {
3840 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3841 const LLT CondTy = MRI.getType(Reg: CarryOut);
3842 const LLT Ty = MRI.getType(Reg: Res);
3843
3844 Register NewRes = MRI.cloneVirtualRegister(VReg: Res);
3845
3846 // Initial add of the two operands.
3847 auto TmpRes = MIRBuilder.buildAdd(Dst: Ty, Src0: LHS, Src1: RHS);
3848
3849 // Initial check for carry.
3850 auto Carry = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_ULT, Res: CondTy, Op0: TmpRes, Op1: LHS);
3851
3852 // Add the sum and the carry.
3853 auto ZExtCarryIn = MIRBuilder.buildZExt(Res: Ty, Op: CarryIn);
3854 MIRBuilder.buildAdd(Dst: NewRes, Src0: TmpRes, Src1: ZExtCarryIn);
3855
3856 // Second check for carry. We can only carry if the initial sum is all 1s
3857 // and the carry is set, resulting in a new sum of 0.
3858 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
3859 auto ResEqZero =
3860 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: CondTy, Op0: NewRes, Op1: Zero);
3861 auto Carry2 = MIRBuilder.buildAnd(Dst: CondTy, Src0: ResEqZero, Src1: CarryIn);
3862 MIRBuilder.buildOr(Dst: CarryOut, Src0: Carry, Src1: Carry2);
3863
3864 MIRBuilder.buildCopy(Res, Op: NewRes);
3865
3866 MI.eraseFromParent();
3867 return Legalized;
3868 }
3869 case G_USUBO: {
3870 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3871
3872 MIRBuilder.buildSub(Dst: Res, Src0: LHS, Src1: RHS);
3873 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_ULT, Res: BorrowOut, Op0: LHS, Op1: RHS);
3874
3875 MI.eraseFromParent();
3876 return Legalized;
3877 }
3878 case G_USUBE: {
3879 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3880 const LLT CondTy = MRI.getType(Reg: BorrowOut);
3881 const LLT Ty = MRI.getType(Reg: Res);
3882
3883 // Initial subtract of the two operands.
3884 auto TmpRes = MIRBuilder.buildSub(Dst: Ty, Src0: LHS, Src1: RHS);
3885
3886 // Initial check for borrow.
3887 auto Borrow = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_UGT, Res: CondTy, Op0: TmpRes, Op1: LHS);
3888
3889 // Subtract the borrow from the first subtract.
3890 auto ZExtBorrowIn = MIRBuilder.buildZExt(Res: Ty, Op: BorrowIn);
3891 MIRBuilder.buildSub(Dst: Res, Src0: TmpRes, Src1: ZExtBorrowIn);
3892
3893 // Second check for borrow. We can only borrow if the initial difference is
3894 // 0 and the borrow is set, resulting in a new difference of all 1s.
3895 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
3896 auto TmpResEqZero =
3897 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: CondTy, Op0: TmpRes, Op1: Zero);
3898 auto Borrow2 = MIRBuilder.buildAnd(Dst: CondTy, Src0: TmpResEqZero, Src1: BorrowIn);
3899 MIRBuilder.buildOr(Dst: BorrowOut, Src0: Borrow, Src1: Borrow2);
3900
3901 MI.eraseFromParent();
3902 return Legalized;
3903 }
3904 case G_UITOFP:
3905 return lowerUITOFP(MI);
3906 case G_SITOFP:
3907 return lowerSITOFP(MI);
3908 case G_FPTOUI:
3909 return lowerFPTOUI(MI);
3910 case G_FPTOSI:
3911 return lowerFPTOSI(MI);
3912 case G_FPTRUNC:
3913 return lowerFPTRUNC(MI);
3914 case G_FPOWI:
3915 return lowerFPOWI(MI);
3916 case G_SMIN:
3917 case G_SMAX:
3918 case G_UMIN:
3919 case G_UMAX:
3920 return lowerMinMax(MI);
3921 case G_FCOPYSIGN:
3922 return lowerFCopySign(MI);
3923 case G_FMINNUM:
3924 case G_FMAXNUM:
3925 return lowerFMinNumMaxNum(MI);
3926 case G_MERGE_VALUES:
3927 return lowerMergeValues(MI);
3928 case G_UNMERGE_VALUES:
3929 return lowerUnmergeValues(MI);
3930 case TargetOpcode::G_SEXT_INREG: {
3931 assert(MI.getOperand(2).isImm() && "Expected immediate");
3932 int64_t SizeInBits = MI.getOperand(i: 2).getImm();
3933
3934 auto [DstReg, SrcReg] = MI.getFirst2Regs();
3935 LLT DstTy = MRI.getType(Reg: DstReg);
3936 Register TmpRes = MRI.createGenericVirtualRegister(Ty: DstTy);
3937
3938 auto MIBSz = MIRBuilder.buildConstant(Res: DstTy, Val: DstTy.getScalarSizeInBits() - SizeInBits);
3939 MIRBuilder.buildShl(Dst: TmpRes, Src0: SrcReg, Src1: MIBSz->getOperand(i: 0));
3940 MIRBuilder.buildAShr(Dst: DstReg, Src0: TmpRes, Src1: MIBSz->getOperand(i: 0));
3941 MI.eraseFromParent();
3942 return Legalized;
3943 }
3944 case G_EXTRACT_VECTOR_ELT:
3945 case G_INSERT_VECTOR_ELT:
3946 return lowerExtractInsertVectorElt(MI);
3947 case G_SHUFFLE_VECTOR:
3948 return lowerShuffleVector(MI);
3949 case G_DYN_STACKALLOC:
3950 return lowerDynStackAlloc(MI);
3951 case G_STACKSAVE:
3952 return lowerStackSave(MI);
3953 case G_STACKRESTORE:
3954 return lowerStackRestore(MI);
3955 case G_EXTRACT:
3956 return lowerExtract(MI);
3957 case G_INSERT:
3958 return lowerInsert(MI);
3959 case G_BSWAP:
3960 return lowerBswap(MI);
3961 case G_BITREVERSE:
3962 return lowerBitreverse(MI);
3963 case G_READ_REGISTER:
3964 case G_WRITE_REGISTER:
3965 return lowerReadWriteRegister(MI);
3966 case G_UADDSAT:
3967 case G_USUBSAT: {
3968 // Try to make a reasonable guess about which lowering strategy to use. The
3969 // target can override this with custom lowering and calling the
3970 // implementation functions.
3971 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
3972 if (LI.isLegalOrCustom(Query: {G_UMIN, Ty}))
3973 return lowerAddSubSatToMinMax(MI);
3974 return lowerAddSubSatToAddoSubo(MI);
3975 }
3976 case G_SADDSAT:
3977 case G_SSUBSAT: {
3978 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
3979
3980 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3981 // since it's a shorter expansion. However, we would need to figure out the
3982 // preferred boolean type for the carry out for the query.
3983 if (LI.isLegalOrCustom(Query: {G_SMIN, Ty}) && LI.isLegalOrCustom(Query: {G_SMAX, Ty}))
3984 return lowerAddSubSatToMinMax(MI);
3985 return lowerAddSubSatToAddoSubo(MI);
3986 }
3987 case G_SSHLSAT:
3988 case G_USHLSAT:
3989 return lowerShlSat(MI);
3990 case G_ABS:
3991 return lowerAbsToAddXor(MI);
3992 case G_SELECT:
3993 return lowerSelect(MI);
3994 case G_IS_FPCLASS:
3995 return lowerISFPCLASS(MI);
3996 case G_SDIVREM:
3997 case G_UDIVREM:
3998 return lowerDIVREM(MI);
3999 case G_FSHL:
4000 case G_FSHR:
4001 return lowerFunnelShift(MI);
4002 case G_ROTL:
4003 case G_ROTR:
4004 return lowerRotate(MI);
4005 case G_MEMSET:
4006 case G_MEMCPY:
4007 case G_MEMMOVE:
4008 return lowerMemCpyFamily(MI);
4009 case G_MEMCPY_INLINE:
4010 return lowerMemcpyInline(MI);
4011 case G_ZEXT:
4012 case G_SEXT:
4013 case G_ANYEXT:
4014 return lowerEXT(MI);
4015 case G_TRUNC:
4016 return lowerTRUNC(MI);
4017 GISEL_VECREDUCE_CASES_NONSEQ
4018 return lowerVectorReduction(MI);
4019 case G_VAARG:
4020 return lowerVAArg(MI);
4021 }
4022}
4023
4024Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
4025 Align MinAlign) const {
4026 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4027 // datalayout for the preferred alignment. Also there should be a target hook
4028 // for this to allow targets to reduce the alignment and ignore the
4029 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4030 // the type.
4031 return std::max(a: Align(PowerOf2Ceil(A: Ty.getSizeInBytes())), b: MinAlign);
4032}
4033
4034MachineInstrBuilder
4035LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
4036 MachinePointerInfo &PtrInfo) {
4037 MachineFunction &MF = MIRBuilder.getMF();
4038 const DataLayout &DL = MIRBuilder.getDataLayout();
4039 int FrameIdx = MF.getFrameInfo().CreateStackObject(Size: Bytes, Alignment, isSpillSlot: false);
4040
4041 unsigned AddrSpace = DL.getAllocaAddrSpace();
4042 LLT FramePtrTy = LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL.getPointerSizeInBits(AS: AddrSpace));
4043
4044 PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIdx);
4045 return MIRBuilder.buildFrameIndex(Res: FramePtrTy, Idx: FrameIdx);
4046}
4047
4048static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg,
4049 LLT VecTy) {
4050 LLT IdxTy = B.getMRI()->getType(Reg: IdxReg);
4051 unsigned NElts = VecTy.getNumElements();
4052
4053 int64_t IdxVal;
4054 if (mi_match(R: IdxReg, MRI: *B.getMRI(), P: m_ICst(Cst&: IdxVal))) {
4055 if (IdxVal < VecTy.getNumElements())
4056 return IdxReg;
4057 // If a constant index would be out of bounds, clamp it as well.
4058 }
4059
4060 if (isPowerOf2_32(Value: NElts)) {
4061 APInt Imm = APInt::getLowBitsSet(numBits: IdxTy.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
4062 return B.buildAnd(Dst: IdxTy, Src0: IdxReg, Src1: B.buildConstant(Res: IdxTy, Val: Imm)).getReg(Idx: 0);
4063 }
4064
4065 return B.buildUMin(Dst: IdxTy, Src0: IdxReg, Src1: B.buildConstant(Res: IdxTy, Val: NElts - 1))
4066 .getReg(Idx: 0);
4067}
4068
4069Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
4070 Register Index) {
4071 LLT EltTy = VecTy.getElementType();
4072
4073 // Calculate the element offset and add it to the pointer.
4074 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4075 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4076 "Converting bits to bytes lost precision");
4077
4078 Index = clampVectorIndex(B&: MIRBuilder, IdxReg: Index, VecTy);
4079
4080 // Convert index to the correct size for the address space.
4081 const DataLayout &DL = MIRBuilder.getDataLayout();
4082 unsigned AS = MRI.getType(Reg: VecPtr).getAddressSpace();
4083 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4084 LLT IdxTy = MRI.getType(Reg: Index).changeElementSize(NewEltSize: IndexSizeInBits);
4085 if (IdxTy != MRI.getType(Reg: Index))
4086 Index = MIRBuilder.buildSExtOrTrunc(Res: IdxTy, Op: Index).getReg(Idx: 0);
4087
4088 auto Mul = MIRBuilder.buildMul(Dst: IdxTy, Src0: Index,
4089 Src1: MIRBuilder.buildConstant(Res: IdxTy, Val: EltSize));
4090
4091 LLT PtrTy = MRI.getType(Reg: VecPtr);
4092 return MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: VecPtr, Op1: Mul).getReg(Idx: 0);
4093}
4094
4095#ifndef NDEBUG
4096/// Check that all vector operands have same number of elements. Other operands
4097/// should be listed in NonVecOp.
4098static bool hasSameNumEltsOnAllVectorOperands(
4099 GenericMachineInstr &MI, MachineRegisterInfo &MRI,
4100 std::initializer_list<unsigned> NonVecOpIndices) {
4101 if (MI.getNumMemOperands() != 0)
4102 return false;
4103
4104 LLT VecTy = MRI.getType(Reg: MI.getReg(Idx: 0));
4105 if (!VecTy.isVector())
4106 return false;
4107 unsigned NumElts = VecTy.getNumElements();
4108
4109 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4110 MachineOperand &Op = MI.getOperand(i: OpIdx);
4111 if (!Op.isReg()) {
4112 if (!is_contained(Set: NonVecOpIndices, Element: OpIdx))
4113 return false;
4114 continue;
4115 }
4116
4117 LLT Ty = MRI.getType(Reg: Op.getReg());
4118 if (!Ty.isVector()) {
4119 if (!is_contained(Set: NonVecOpIndices, Element: OpIdx))
4120 return false;
4121 continue;
4122 }
4123
4124 if (Ty.getNumElements() != NumElts)
4125 return false;
4126 }
4127
4128 return true;
4129}
4130#endif
4131
4132/// Fill \p DstOps with DstOps that have same number of elements combined as
4133/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4134/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4135/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4136static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4137 unsigned NumElts) {
4138 LLT LeftoverTy;
4139 assert(Ty.isVector() && "Expected vector type");
4140 LLT EltTy = Ty.getElementType();
4141 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElements: NumElts, ScalarTy: EltTy);
4142 int NumParts, NumLeftover;
4143 std::tie(args&: NumParts, args&: NumLeftover) =
4144 getNarrowTypeBreakDown(OrigTy: Ty, NarrowTy, LeftoverTy);
4145
4146 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4147 for (int i = 0; i < NumParts; ++i) {
4148 DstOps.push_back(Elt: NarrowTy);
4149 }
4150
4151 if (LeftoverTy.isValid()) {
4152 assert(NumLeftover == 1 && "expected exactly one leftover");
4153 DstOps.push_back(Elt: LeftoverTy);
4154 }
4155}
4156
4157/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4158/// made from \p Op depending on operand type.
4159static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4160 MachineOperand &Op) {
4161 for (unsigned i = 0; i < N; ++i) {
4162 if (Op.isReg())
4163 Ops.push_back(Elt: Op.getReg());
4164 else if (Op.isImm())
4165 Ops.push_back(Elt: Op.getImm());
4166 else if (Op.isPredicate())
4167 Ops.push_back(Elt: static_cast<CmpInst::Predicate>(Op.getPredicate()));
4168 else
4169 llvm_unreachable("Unsupported type");
4170 }
4171}
4172
4173// Handle splitting vector operations which need to have the same number of
4174// elements in each type index, but each type index may have a different element
4175// type.
4176//
4177// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4178// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4179// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4180//
4181// Also handles some irregular breakdown cases, e.g.
4182// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4183// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4184// s64 = G_SHL s64, s32
4185LegalizerHelper::LegalizeResult
4186LegalizerHelper::fewerElementsVectorMultiEltType(
4187 GenericMachineInstr &MI, unsigned NumElts,
4188 std::initializer_list<unsigned> NonVecOpIndices) {
4189 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4190 "Non-compatible opcode or not specified non-vector operands");
4191 unsigned OrigNumElts = MRI.getType(Reg: MI.getReg(Idx: 0)).getNumElements();
4192
4193 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4194 unsigned NumDefs = MI.getNumDefs();
4195
4196 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4197 // Build instructions with DstOps to use instruction found by CSE directly.
4198 // CSE copies found instruction into given vreg when building with vreg dest.
4199 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4200 // Output registers will be taken from created instructions.
4201 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4202 for (unsigned i = 0; i < NumDefs; ++i) {
4203 makeDstOps(DstOps&: OutputOpsPieces[i], Ty: MRI.getType(Reg: MI.getReg(Idx: i)), NumElts);
4204 }
4205
4206 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4207 // Operands listed in NonVecOpIndices will be used as is without splitting;
4208 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4209 // scalar condition (op 1), immediate in sext_inreg (op 2).
4210 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4211 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4212 ++UseIdx, ++UseNo) {
4213 if (is_contained(Set: NonVecOpIndices, Element: UseIdx)) {
4214 broadcastSrcOp(Ops&: InputOpsPieces[UseNo], N: OutputOpsPieces[0].size(),
4215 Op&: MI.getOperand(i: UseIdx));
4216 } else {
4217 SmallVector<Register, 8> SplitPieces;
4218 extractVectorParts(Reg: MI.getReg(Idx: UseIdx), NumElts, VRegs&: SplitPieces, MIRBuilder,
4219 MRI);
4220 for (auto Reg : SplitPieces)
4221 InputOpsPieces[UseNo].push_back(Elt: Reg);
4222 }
4223 }
4224
4225 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4226
4227 // Take i-th piece of each input operand split and build sub-vector/scalar
4228 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4229 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4230 SmallVector<DstOp, 2> Defs;
4231 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4232 Defs.push_back(Elt: OutputOpsPieces[DstNo][i]);
4233
4234 SmallVector<SrcOp, 3> Uses;
4235 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4236 Uses.push_back(Elt: InputOpsPieces[InputNo][i]);
4237
4238 auto I = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: Defs, SrcOps: Uses, Flags: MI.getFlags());
4239 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4240 OutputRegs[DstNo].push_back(Elt: I.getReg(Idx: DstNo));
4241 }
4242
4243 // Merge small outputs into MI's output for each def operand.
4244 if (NumLeftovers) {
4245 for (unsigned i = 0; i < NumDefs; ++i)
4246 mergeMixedSubvectors(DstReg: MI.getReg(Idx: i), PartRegs: OutputRegs[i]);
4247 } else {
4248 for (unsigned i = 0; i < NumDefs; ++i)
4249 MIRBuilder.buildMergeLikeInstr(Res: MI.getReg(Idx: i), Ops: OutputRegs[i]);
4250 }
4251
4252 MI.eraseFromParent();
4253 return Legalized;
4254}
4255
4256LegalizerHelper::LegalizeResult
4257LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI,
4258 unsigned NumElts) {
4259 unsigned OrigNumElts = MRI.getType(Reg: MI.getReg(Idx: 0)).getNumElements();
4260
4261 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4262 unsigned NumDefs = MI.getNumDefs();
4263
4264 SmallVector<DstOp, 8> OutputOpsPieces;
4265 SmallVector<Register, 8> OutputRegs;
4266 makeDstOps(DstOps&: OutputOpsPieces, Ty: MRI.getType(Reg: MI.getReg(Idx: 0)), NumElts);
4267
4268 // Instructions that perform register split will be inserted in basic block
4269 // where register is defined (basic block is in the next operand).
4270 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4271 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4272 UseIdx += 2, ++UseNo) {
4273 MachineBasicBlock &OpMBB = *MI.getOperand(i: UseIdx + 1).getMBB();
4274 MIRBuilder.setInsertPt(MBB&: OpMBB, II: OpMBB.getFirstTerminatorForward());
4275 extractVectorParts(Reg: MI.getReg(Idx: UseIdx), NumElts, VRegs&: InputOpsPieces[UseNo],
4276 MIRBuilder, MRI);
4277 }
4278
4279 // Build PHIs with fewer elements.
4280 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4281 MIRBuilder.setInsertPt(MBB&: *MI.getParent(), II: MI);
4282 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4283 auto Phi = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_PHI);
4284 Phi.addDef(
4285 RegNo: MRI.createGenericVirtualRegister(Ty: OutputOpsPieces[i].getLLTTy(MRI)));
4286 OutputRegs.push_back(Elt: Phi.getReg(Idx: 0));
4287
4288 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4289 Phi.addUse(RegNo: InputOpsPieces[j][i]);
4290 Phi.add(MO: MI.getOperand(i: 1 + j * 2 + 1));
4291 }
4292 }
4293
4294 // Set the insert point after the existing PHIs
4295 MachineBasicBlock &MBB = *MI.getParent();
4296 MIRBuilder.setInsertPt(MBB, II: MBB.getFirstNonPHI());
4297
4298 // Merge small outputs into MI's def.
4299 if (NumLeftovers) {
4300 mergeMixedSubvectors(DstReg: MI.getReg(Idx: 0), PartRegs: OutputRegs);
4301 } else {
4302 MIRBuilder.buildMergeLikeInstr(Res: MI.getReg(Idx: 0), Ops: OutputRegs);
4303 }
4304
4305 MI.eraseFromParent();
4306 return Legalized;
4307}
4308
4309LegalizerHelper::LegalizeResult
4310LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
4311 unsigned TypeIdx,
4312 LLT NarrowTy) {
4313 const int NumDst = MI.getNumOperands() - 1;
4314 const Register SrcReg = MI.getOperand(i: NumDst).getReg();
4315 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
4316 LLT SrcTy = MRI.getType(Reg: SrcReg);
4317
4318 if (TypeIdx != 1 || NarrowTy == DstTy)
4319 return UnableToLegalize;
4320
4321 // Requires compatible types. Otherwise SrcReg should have been defined by
4322 // merge-like instruction that would get artifact combined. Most likely
4323 // instruction that defines SrcReg has to perform more/fewer elements
4324 // legalization compatible with NarrowTy.
4325 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4326 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4327
4328 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4329 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4330 return UnableToLegalize;
4331
4332 // This is most likely DstTy (smaller then register size) packed in SrcTy
4333 // (larger then register size) and since unmerge was not combined it will be
4334 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4335 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4336
4337 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4338 //
4339 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4340 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4341 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4342 auto Unmerge = MIRBuilder.buildUnmerge(Res: NarrowTy, Op: SrcReg);
4343 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4344 const int PartsPerUnmerge = NumDst / NumUnmerge;
4345
4346 for (int I = 0; I != NumUnmerge; ++I) {
4347 auto MIB = MIRBuilder.buildInstr(Opcode: TargetOpcode::G_UNMERGE_VALUES);
4348
4349 for (int J = 0; J != PartsPerUnmerge; ++J)
4350 MIB.addDef(RegNo: MI.getOperand(i: I * PartsPerUnmerge + J).getReg());
4351 MIB.addUse(RegNo: Unmerge.getReg(Idx: I));
4352 }
4353
4354 MI.eraseFromParent();
4355 return Legalized;
4356}
4357
4358LegalizerHelper::LegalizeResult
4359LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
4360 LLT NarrowTy) {
4361 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4362 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4363 // that should have been artifact combined. Most likely instruction that uses
4364 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4365 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4366 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4367 if (NarrowTy == SrcTy)
4368 return UnableToLegalize;
4369
4370 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4371 // is for old mir tests. Since the changes to more/fewer elements it should no
4372 // longer be possible to generate MIR like this when starting from llvm-ir
4373 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4374 if (TypeIdx == 1) {
4375 assert(SrcTy.isVector() && "Expected vector types");
4376 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4377 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4378 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4379 return UnableToLegalize;
4380 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4381 //
4382 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4383 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4384 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4385 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4386 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4387 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4388
4389 SmallVector<Register, 8> Elts;
4390 LLT EltTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getScalarType();
4391 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
4392 auto Unmerge = MIRBuilder.buildUnmerge(Res: EltTy, Op: MI.getOperand(i).getReg());
4393 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4394 Elts.push_back(Elt: Unmerge.getReg(Idx: j));
4395 }
4396
4397 SmallVector<Register, 8> NarrowTyElts;
4398 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4399 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4400 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
4401 ++i, Offset += NumNarrowTyElts) {
4402 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4403 NarrowTyElts.push_back(
4404 Elt: MIRBuilder.buildMergeLikeInstr(Res: NarrowTy, Ops: Pieces).getReg(Idx: 0));
4405 }
4406
4407 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: NarrowTyElts);
4408 MI.eraseFromParent();
4409 return Legalized;
4410 }
4411
4412 assert(TypeIdx == 0 && "Bad type index");
4413 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4414 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4415 return UnableToLegalize;
4416
4417 // This is most likely SrcTy (smaller then register size) packed in DstTy
4418 // (larger then register size) and since merge was not combined it will be
4419 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4420 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4421
4422 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4423 //
4424 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4425 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4426 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4427 SmallVector<Register, 8> NarrowTyElts;
4428 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4429 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4430 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4431 for (unsigned i = 0; i < NumParts; ++i) {
4432 SmallVector<Register, 8> Sources;
4433 for (unsigned j = 0; j < NumElts; ++j)
4434 Sources.push_back(Elt: MI.getOperand(i: 1 + i * NumElts + j).getReg());
4435 NarrowTyElts.push_back(
4436 Elt: MIRBuilder.buildMergeLikeInstr(Res: NarrowTy, Ops: Sources).getReg(Idx: 0));
4437 }
4438
4439 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: NarrowTyElts);
4440 MI.eraseFromParent();
4441 return Legalized;
4442}
4443
4444LegalizerHelper::LegalizeResult
4445LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
4446 unsigned TypeIdx,
4447 LLT NarrowVecTy) {
4448 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4449 Register InsertVal;
4450 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4451
4452 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4453 if (IsInsert)
4454 InsertVal = MI.getOperand(i: 2).getReg();
4455
4456 Register Idx = MI.getOperand(i: MI.getNumOperands() - 1).getReg();
4457
4458 // TODO: Handle total scalarization case.
4459 if (!NarrowVecTy.isVector())
4460 return UnableToLegalize;
4461
4462 LLT VecTy = MRI.getType(Reg: SrcVec);
4463
4464 // If the index is a constant, we can really break this down as you would
4465 // expect, and index into the target size pieces.
4466 int64_t IdxVal;
4467 auto MaybeCst = getIConstantVRegValWithLookThrough(VReg: Idx, MRI);
4468 if (MaybeCst) {
4469 IdxVal = MaybeCst->Value.getSExtValue();
4470 // Avoid out of bounds indexing the pieces.
4471 if (IdxVal >= VecTy.getNumElements()) {
4472 MIRBuilder.buildUndef(Res: DstReg);
4473 MI.eraseFromParent();
4474 return Legalized;
4475 }
4476
4477 SmallVector<Register, 8> VecParts;
4478 LLT GCDTy = extractGCDType(Parts&: VecParts, DstTy: VecTy, NarrowTy: NarrowVecTy, SrcReg: SrcVec);
4479
4480 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4481 LLT LCMTy = buildLCMMergePieces(DstTy: VecTy, NarrowTy: NarrowVecTy, GCDTy, VRegs&: VecParts,
4482 PadStrategy: TargetOpcode::G_ANYEXT);
4483
4484 unsigned NewNumElts = NarrowVecTy.getNumElements();
4485
4486 LLT IdxTy = MRI.getType(Reg: Idx);
4487 int64_t PartIdx = IdxVal / NewNumElts;
4488 auto NewIdx =
4489 MIRBuilder.buildConstant(Res: IdxTy, Val: IdxVal - NewNumElts * PartIdx);
4490
4491 if (IsInsert) {
4492 LLT PartTy = MRI.getType(Reg: VecParts[PartIdx]);
4493
4494 // Use the adjusted index to insert into one of the subvectors.
4495 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4496 Res: PartTy, Val: VecParts[PartIdx], Elt: InsertVal, Idx: NewIdx);
4497 VecParts[PartIdx] = InsertPart.getReg(Idx: 0);
4498
4499 // Recombine the inserted subvector with the others to reform the result
4500 // vector.
4501 buildWidenedRemergeToDst(DstReg, LCMTy, RemergeRegs: VecParts);
4502 } else {
4503 MIRBuilder.buildExtractVectorElement(Res: DstReg, Val: VecParts[PartIdx], Idx: NewIdx);
4504 }
4505
4506 MI.eraseFromParent();
4507 return Legalized;
4508 }
4509
4510 // With a variable index, we can't perform the operation in a smaller type, so
4511 // we're forced to expand this.
4512 //
4513 // TODO: We could emit a chain of compare/select to figure out which piece to
4514 // index.
4515 return lowerExtractInsertVectorElt(MI);
4516}
4517
4518LegalizerHelper::LegalizeResult
4519LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
4520 LLT NarrowTy) {
4521 // FIXME: Don't know how to handle secondary types yet.
4522 if (TypeIdx != 0)
4523 return UnableToLegalize;
4524
4525 // This implementation doesn't work for atomics. Give up instead of doing
4526 // something invalid.
4527 if (LdStMI.isAtomic())
4528 return UnableToLegalize;
4529
4530 bool IsLoad = isa<GLoad>(Val: LdStMI);
4531 Register ValReg = LdStMI.getReg(Idx: 0);
4532 Register AddrReg = LdStMI.getPointerReg();
4533 LLT ValTy = MRI.getType(Reg: ValReg);
4534
4535 // FIXME: Do we need a distinct NarrowMemory legalize action?
4536 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
4537 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4538 return UnableToLegalize;
4539 }
4540
4541 int NumParts = -1;
4542 int NumLeftover = -1;
4543 LLT LeftoverTy;
4544 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4545 if (IsLoad) {
4546 std::tie(args&: NumParts, args&: NumLeftover) = getNarrowTypeBreakDown(OrigTy: ValTy, NarrowTy, LeftoverTy);
4547 } else {
4548 if (extractParts(Reg: ValReg, RegTy: ValTy, MainTy: NarrowTy, LeftoverTy, VRegs&: NarrowRegs,
4549 LeftoverVRegs&: NarrowLeftoverRegs, MIRBuilder, MRI)) {
4550 NumParts = NarrowRegs.size();
4551 NumLeftover = NarrowLeftoverRegs.size();
4552 }
4553 }
4554
4555 if (NumParts == -1)
4556 return UnableToLegalize;
4557
4558 LLT PtrTy = MRI.getType(Reg: AddrReg);
4559 const LLT OffsetTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
4560
4561 unsigned TotalSize = ValTy.getSizeInBits();
4562
4563 // Split the load/store into PartTy sized pieces starting at Offset. If this
4564 // is a load, return the new registers in ValRegs. For a store, each elements
4565 // of ValRegs should be PartTy. Returns the next offset that needs to be
4566 // handled.
4567 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
4568 auto MMO = LdStMI.getMMO();
4569 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4570 unsigned NumParts, unsigned Offset) -> unsigned {
4571 MachineFunction &MF = MIRBuilder.getMF();
4572 unsigned PartSize = PartTy.getSizeInBits();
4573 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4574 ++Idx) {
4575 unsigned ByteOffset = Offset / 8;
4576 Register NewAddrReg;
4577
4578 MIRBuilder.materializePtrAdd(Res&: NewAddrReg, Op0: AddrReg, ValueTy: OffsetTy, Value: ByteOffset);
4579
4580 MachineMemOperand *NewMMO =
4581 MF.getMachineMemOperand(MMO: &MMO, Offset: ByteOffset, Ty: PartTy);
4582
4583 if (IsLoad) {
4584 Register Dst = MRI.createGenericVirtualRegister(Ty: PartTy);
4585 ValRegs.push_back(Elt: Dst);
4586 MIRBuilder.buildLoad(Res: Dst, Addr: NewAddrReg, MMO&: *NewMMO);
4587 } else {
4588 MIRBuilder.buildStore(Val: ValRegs[Idx], Addr: NewAddrReg, MMO&: *NewMMO);
4589 }
4590 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4591 }
4592
4593 return Offset;
4594 };
4595
4596 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4597 unsigned HandledOffset =
4598 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4599
4600 // Handle the rest of the register if this isn't an even type breakdown.
4601 if (LeftoverTy.isValid())
4602 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4603
4604 if (IsLoad) {
4605 insertParts(DstReg: ValReg, ResultTy: ValTy, PartTy: NarrowTy, PartRegs: NarrowRegs,
4606 LeftoverTy, LeftoverRegs: NarrowLeftoverRegs);
4607 }
4608
4609 LdStMI.eraseFromParent();
4610 return Legalized;
4611}
4612
4613LegalizerHelper::LegalizeResult
4614LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
4615 LLT NarrowTy) {
4616 using namespace TargetOpcode;
4617 GenericMachineInstr &GMI = cast<GenericMachineInstr>(Val&: MI);
4618 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4619
4620 switch (MI.getOpcode()) {
4621 case G_IMPLICIT_DEF:
4622 case G_TRUNC:
4623 case G_AND:
4624 case G_OR:
4625 case G_XOR:
4626 case G_ADD:
4627 case G_SUB:
4628 case G_MUL:
4629 case G_PTR_ADD:
4630 case G_SMULH:
4631 case G_UMULH:
4632 case G_FADD:
4633 case G_FMUL:
4634 case G_FSUB:
4635 case G_FNEG:
4636 case G_FABS:
4637 case G_FCANONICALIZE:
4638 case G_FDIV:
4639 case G_FREM:
4640 case G_FMA:
4641 case G_FMAD:
4642 case G_FPOW:
4643 case G_FEXP:
4644 case G_FEXP2:
4645 case G_FEXP10:
4646 case G_FLOG:
4647 case G_FLOG2:
4648 case G_FLOG10:
4649 case G_FLDEXP:
4650 case G_FNEARBYINT:
4651 case G_FCEIL:
4652 case G_FFLOOR:
4653 case G_FRINT:
4654 case G_INTRINSIC_ROUND:
4655 case G_INTRINSIC_ROUNDEVEN:
4656 case G_INTRINSIC_TRUNC:
4657 case G_FCOS:
4658 case G_FSIN:
4659 case G_FSQRT:
4660 case G_BSWAP:
4661 case G_BITREVERSE:
4662 case G_SDIV:
4663 case G_UDIV:
4664 case G_SREM:
4665 case G_UREM:
4666 case G_SDIVREM:
4667 case G_UDIVREM:
4668 case G_SMIN:
4669 case G_SMAX:
4670 case G_UMIN:
4671 case G_UMAX:
4672 case G_ABS:
4673 case G_FMINNUM:
4674 case G_FMAXNUM:
4675 case G_FMINNUM_IEEE:
4676 case G_FMAXNUM_IEEE:
4677 case G_FMINIMUM:
4678 case G_FMAXIMUM:
4679 case G_FSHL:
4680 case G_FSHR:
4681 case G_ROTL:
4682 case G_ROTR:
4683 case G_FREEZE:
4684 case G_SADDSAT:
4685 case G_SSUBSAT:
4686 case G_UADDSAT:
4687 case G_USUBSAT:
4688 case G_UMULO:
4689 case G_SMULO:
4690 case G_SHL:
4691 case G_LSHR:
4692 case G_ASHR:
4693 case G_SSHLSAT:
4694 case G_USHLSAT:
4695 case G_CTLZ:
4696 case G_CTLZ_ZERO_UNDEF:
4697 case G_CTTZ:
4698 case G_CTTZ_ZERO_UNDEF:
4699 case G_CTPOP:
4700 case G_FCOPYSIGN:
4701 case G_ZEXT:
4702 case G_SEXT:
4703 case G_ANYEXT:
4704 case G_FPEXT:
4705 case G_FPTRUNC:
4706 case G_SITOFP:
4707 case G_UITOFP:
4708 case G_FPTOSI:
4709 case G_FPTOUI:
4710 case G_INTTOPTR:
4711 case G_PTRTOINT:
4712 case G_ADDRSPACE_CAST:
4713 case G_UADDO:
4714 case G_USUBO:
4715 case G_UADDE:
4716 case G_USUBE:
4717 case G_SADDO:
4718 case G_SSUBO:
4719 case G_SADDE:
4720 case G_SSUBE:
4721 case G_STRICT_FADD:
4722 case G_STRICT_FSUB:
4723 case G_STRICT_FMUL:
4724 case G_STRICT_FMA:
4725 case G_STRICT_FLDEXP:
4726 case G_FFREXP:
4727 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts);
4728 case G_ICMP:
4729 case G_FCMP:
4730 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {1 /*cpm predicate*/});
4731 case G_IS_FPCLASS:
4732 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {2, 3 /*mask,fpsem*/});
4733 case G_SELECT:
4734 if (MRI.getType(Reg: MI.getOperand(i: 1).getReg()).isVector())
4735 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts);
4736 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {1 /*scalar cond*/});
4737 case G_PHI:
4738 return fewerElementsVectorPhi(MI&: GMI, NumElts);
4739 case G_UNMERGE_VALUES:
4740 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4741 case G_BUILD_VECTOR:
4742 assert(TypeIdx == 0 && "not a vector type index");
4743 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4744 case G_CONCAT_VECTORS:
4745 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4746 return UnableToLegalize;
4747 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4748 case G_EXTRACT_VECTOR_ELT:
4749 case G_INSERT_VECTOR_ELT:
4750 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowVecTy: NarrowTy);
4751 case G_LOAD:
4752 case G_STORE:
4753 return reduceLoadStoreWidth(LdStMI&: cast<GLoadStore>(Val&: MI), TypeIdx, NarrowTy);
4754 case G_SEXT_INREG:
4755 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {2 /*imm*/});
4756 GISEL_VECREDUCE_CASES_NONSEQ
4757 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4758 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4759 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4760 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4761 case G_SHUFFLE_VECTOR:
4762 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4763 case G_FPOWI:
4764 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {2 /*pow*/});
4765 case G_BITCAST:
4766 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
4767 case G_INTRINSIC_FPTRUNC_ROUND:
4768 return fewerElementsVectorMultiEltType(MI&: GMI, NumElts, NonVecOpIndices: {2});
4769 default:
4770 return UnableToLegalize;
4771 }
4772}
4773
4774LegalizerHelper::LegalizeResult
4775LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx,
4776 LLT NarrowTy) {
4777 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
4778 "Not a bitcast operation");
4779
4780 if (TypeIdx != 0)
4781 return UnableToLegalize;
4782
4783 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4784
4785 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
4786 LLT SrcNarrowTy =
4787 LLT::fixed_vector(NumElements: NarrowTy.getSizeInBits() / SrcScalSize, ScalarSizeInBits: SrcScalSize);
4788
4789 // Split the Src and Dst Reg into smaller registers
4790 SmallVector<Register> SrcVRegs, BitcastVRegs;
4791 if (extractGCDType(Parts&: SrcVRegs, DstTy, NarrowTy: SrcNarrowTy, SrcReg) != SrcNarrowTy)
4792 return UnableToLegalize;
4793
4794 // Build new smaller bitcast instructions
4795 // Not supporting Leftover types for now but will have to
4796 for (unsigned i = 0; i < SrcVRegs.size(); i++)
4797 BitcastVRegs.push_back(
4798 Elt: MIRBuilder.buildBitcast(Dst: NarrowTy, Src: SrcVRegs[i]).getReg(Idx: 0));
4799
4800 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: BitcastVRegs);
4801 MI.eraseFromParent();
4802 return Legalized;
4803}
4804
4805LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
4806 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4807 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4808 if (TypeIdx != 0)
4809 return UnableToLegalize;
4810
4811 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4812 MI.getFirst3RegLLTs();
4813 ArrayRef<int> Mask = MI.getOperand(i: 3).getShuffleMask();
4814 // The shuffle should be canonicalized by now.
4815 if (DstTy != Src1Ty)
4816 return UnableToLegalize;
4817 if (DstTy != Src2Ty)
4818 return UnableToLegalize;
4819
4820 if (!isPowerOf2_32(Value: DstTy.getNumElements()))
4821 return UnableToLegalize;
4822
4823 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4824 // Further legalization attempts will be needed to do split further.
4825 NarrowTy =
4826 DstTy.changeElementCount(EC: DstTy.getElementCount().divideCoefficientBy(RHS: 2));
4827 unsigned NewElts = NarrowTy.getNumElements();
4828
4829 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4830 extractParts(Reg: Src1Reg, Ty: NarrowTy, NumParts: 2, VRegs&: SplitSrc1Regs, MIRBuilder, MRI);
4831 extractParts(Reg: Src2Reg, Ty: NarrowTy, NumParts: 2, VRegs&: SplitSrc2Regs, MIRBuilder, MRI);
4832 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4833 SplitSrc2Regs[1]};
4834
4835 Register Hi, Lo;
4836
4837 // If Lo or Hi uses elements from at most two of the four input vectors, then
4838 // express it as a vector shuffle of those two inputs. Otherwise extract the
4839 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4840 SmallVector<int, 16> Ops;
4841 for (unsigned High = 0; High < 2; ++High) {
4842 Register &Output = High ? Hi : Lo;
4843
4844 // Build a shuffle mask for the output, discovering on the fly which
4845 // input vectors to use as shuffle operands (recorded in InputUsed).
4846 // If building a suitable shuffle vector proves too hard, then bail
4847 // out with useBuildVector set.
4848 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4849 unsigned FirstMaskIdx = High * NewElts;
4850 bool UseBuildVector = false;
4851 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4852 // The mask element. This indexes into the input.
4853 int Idx = Mask[FirstMaskIdx + MaskOffset];
4854
4855 // The input vector this mask element indexes into.
4856 unsigned Input = (unsigned)Idx / NewElts;
4857
4858 if (Input >= std::size(Inputs)) {
4859 // The mask element does not index into any input vector.
4860 Ops.push_back(Elt: -1);
4861 continue;
4862 }
4863
4864 // Turn the index into an offset from the start of the input vector.
4865 Idx -= Input * NewElts;
4866
4867 // Find or create a shuffle vector operand to hold this input.
4868 unsigned OpNo;
4869 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4870 if (InputUsed[OpNo] == Input) {
4871 // This input vector is already an operand.
4872 break;
4873 } else if (InputUsed[OpNo] == -1U) {
4874 // Create a new operand for this input vector.
4875 InputUsed[OpNo] = Input;
4876 break;
4877 }
4878 }
4879
4880 if (OpNo >= std::size(InputUsed)) {
4881 // More than two input vectors used! Give up on trying to create a
4882 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4883 UseBuildVector = true;
4884 break;
4885 }
4886
4887 // Add the mask index for the new shuffle vector.
4888 Ops.push_back(Elt: Idx + OpNo * NewElts);
4889 }
4890
4891 if (UseBuildVector) {
4892 LLT EltTy = NarrowTy.getElementType();
4893 SmallVector<Register, 16> SVOps;
4894
4895 // Extract the input elements by hand.
4896 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4897 // The mask element. This indexes into the input.
4898 int Idx = Mask[FirstMaskIdx + MaskOffset];
4899
4900 // The input vector this mask element indexes into.
4901 unsigned Input = (unsigned)Idx / NewElts;
4902
4903 if (Input >= std::size(Inputs)) {
4904 // The mask element is "undef" or indexes off the end of the input.
4905 SVOps.push_back(Elt: MIRBuilder.buildUndef(Res: EltTy).getReg(Idx: 0));
4906 continue;
4907 }
4908
4909 // Turn the index into an offset from the start of the input vector.
4910 Idx -= Input * NewElts;
4911
4912 // Extract the vector element by hand.
4913 SVOps.push_back(Elt: MIRBuilder
4914 .buildExtractVectorElement(
4915 Res: EltTy, Val: Inputs[Input],
4916 Idx: MIRBuilder.buildConstant(Res: LLT::scalar(SizeInBits: 32), Val: Idx))
4917 .getReg(Idx: 0));
4918 }
4919
4920 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4921 Output = MIRBuilder.buildBuildVector(Res: NarrowTy, Ops: SVOps).getReg(Idx: 0);
4922 } else if (InputUsed[0] == -1U) {
4923 // No input vectors were used! The result is undefined.
4924 Output = MIRBuilder.buildUndef(Res: NarrowTy).getReg(Idx: 0);
4925 } else {
4926 Register Op0 = Inputs[InputUsed[0]];
4927 // If only one input was used, use an undefined vector for the other.
4928 Register Op1 = InputUsed[1] == -1U
4929 ? MIRBuilder.buildUndef(Res: NarrowTy).getReg(Idx: 0)
4930 : Inputs[InputUsed[1]];
4931 // At least one input vector was used. Create a new shuffle vector.
4932 Output = MIRBuilder.buildShuffleVector(Res: NarrowTy, Src1: Op0, Src2: Op1, Mask: Ops).getReg(Idx: 0);
4933 }
4934
4935 Ops.clear();
4936 }
4937
4938 MIRBuilder.buildConcatVectors(Res: DstReg, Ops: {Lo, Hi});
4939 MI.eraseFromParent();
4940 return Legalized;
4941}
4942
4943LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
4944 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4945 auto &RdxMI = cast<GVecReduce>(Val&: MI);
4946
4947 if (TypeIdx != 1)
4948 return UnableToLegalize;
4949
4950 // The semantics of the normal non-sequential reductions allow us to freely
4951 // re-associate the operation.
4952 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
4953
4954 if (NarrowTy.isVector() &&
4955 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4956 return UnableToLegalize;
4957
4958 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
4959 SmallVector<Register> SplitSrcs;
4960 // If NarrowTy is a scalar then we're being asked to scalarize.
4961 const unsigned NumParts =
4962 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4963 : SrcTy.getNumElements();
4964
4965 extractParts(Reg: SrcReg, Ty: NarrowTy, NumParts, VRegs&: SplitSrcs, MIRBuilder, MRI);
4966 if (NarrowTy.isScalar()) {
4967 if (DstTy != NarrowTy)
4968 return UnableToLegalize; // FIXME: handle implicit extensions.
4969
4970 if (isPowerOf2_32(Value: NumParts)) {
4971 // Generate a tree of scalar operations to reduce the critical path.
4972 SmallVector<Register> PartialResults;
4973 unsigned NumPartsLeft = NumParts;
4974 while (NumPartsLeft > 1) {
4975 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4976 PartialResults.emplace_back(
4977 Args: MIRBuilder
4978 .buildInstr(Opc: ScalarOpc, DstOps: {NarrowTy},
4979 SrcOps: {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4980 .getReg(Idx: 0));
4981 }
4982 SplitSrcs = PartialResults;
4983 PartialResults.clear();
4984 NumPartsLeft = SplitSrcs.size();
4985 }
4986 assert(SplitSrcs.size() == 1);
4987 MIRBuilder.buildCopy(Res: DstReg, Op: SplitSrcs[0]);
4988 MI.eraseFromParent();
4989 return Legalized;
4990 }
4991 // If we can't generate a tree, then just do sequential operations.
4992 Register Acc = SplitSrcs[0];
4993 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4994 Acc = MIRBuilder.buildInstr(Opc: ScalarOpc, DstOps: {NarrowTy}, SrcOps: {Acc, SplitSrcs[Idx]})
4995 .getReg(Idx: 0);
4996 MIRBuilder.buildCopy(Res: DstReg, Op: Acc);
4997 MI.eraseFromParent();
4998 return Legalized;
4999 }
5000 SmallVector<Register> PartialReductions;
5001 for (unsigned Part = 0; Part < NumParts; ++Part) {
5002 PartialReductions.push_back(
5003 Elt: MIRBuilder.buildInstr(Opc: RdxMI.getOpcode(), DstOps: {DstTy}, SrcOps: {SplitSrcs[Part]})
5004 .getReg(Idx: 0));
5005 }
5006
5007 // If the types involved are powers of 2, we can generate intermediate vector
5008 // ops, before generating a final reduction operation.
5009 if (isPowerOf2_32(Value: SrcTy.getNumElements()) &&
5010 isPowerOf2_32(Value: NarrowTy.getNumElements())) {
5011 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5012 }
5013
5014 Register Acc = PartialReductions[0];
5015 for (unsigned Part = 1; Part < NumParts; ++Part) {
5016 if (Part == NumParts - 1) {
5017 MIRBuilder.buildInstr(Opc: ScalarOpc, DstOps: {DstReg},
5018 SrcOps: {Acc, PartialReductions[Part]});
5019 } else {
5020 Acc = MIRBuilder
5021 .buildInstr(Opc: ScalarOpc, DstOps: {DstTy}, SrcOps: {Acc, PartialReductions[Part]})
5022 .getReg(Idx: 0);
5023 }
5024 }
5025 MI.eraseFromParent();
5026 return Legalized;
5027}
5028
5029LegalizerHelper::LegalizeResult
5030LegalizerHelper::fewerElementsVectorSeqReductions(MachineInstr &MI,
5031 unsigned int TypeIdx,
5032 LLT NarrowTy) {
5033 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5034 MI.getFirst3RegLLTs();
5035 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5036 DstTy != NarrowTy)
5037 return UnableToLegalize;
5038
5039 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5040 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5041 "Unexpected vecreduce opcode");
5042 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5043 ? TargetOpcode::G_FADD
5044 : TargetOpcode::G_FMUL;
5045
5046 SmallVector<Register> SplitSrcs;
5047 unsigned NumParts = SrcTy.getNumElements();
5048 extractParts(Reg: SrcReg, Ty: NarrowTy, NumParts, VRegs&: SplitSrcs, MIRBuilder, MRI);
5049 Register Acc = ScalarReg;
5050 for (unsigned i = 0; i < NumParts; i++)
5051 Acc = MIRBuilder.buildInstr(Opc: ScalarOpc, DstOps: {NarrowTy}, SrcOps: {Acc, SplitSrcs[i]})
5052 .getReg(Idx: 0);
5053
5054 MIRBuilder.buildCopy(Res: DstReg, Op: Acc);
5055 MI.eraseFromParent();
5056 return Legalized;
5057}
5058
5059LegalizerHelper::LegalizeResult
5060LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5061 LLT SrcTy, LLT NarrowTy,
5062 unsigned ScalarOpc) {
5063 SmallVector<Register> SplitSrcs;
5064 // Split the sources into NarrowTy size pieces.
5065 extractParts(Reg: SrcReg, Ty: NarrowTy,
5066 NumParts: SrcTy.getNumElements() / NarrowTy.getNumElements(), VRegs&: SplitSrcs,
5067 MIRBuilder, MRI);
5068 // We're going to do a tree reduction using vector operations until we have
5069 // one NarrowTy size value left.
5070 while (SplitSrcs.size() > 1) {
5071 SmallVector<Register> PartialRdxs;
5072 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5073 Register LHS = SplitSrcs[Idx];
5074 Register RHS = SplitSrcs[Idx + 1];
5075 // Create the intermediate vector op.
5076 Register Res =
5077 MIRBuilder.buildInstr(Opc: ScalarOpc, DstOps: {NarrowTy}, SrcOps: {LHS, RHS}).getReg(Idx: 0);
5078 PartialRdxs.push_back(Elt: Res);
5079 }
5080 SplitSrcs = std::move(PartialRdxs);
5081 }
5082 // Finally generate the requested NarrowTy based reduction.
5083 Observer.changingInstr(MI);
5084 MI.getOperand(i: 1).setReg(SplitSrcs[0]);
5085 Observer.changedInstr(MI);
5086 return Legalized;
5087}
5088
5089LegalizerHelper::LegalizeResult
5090LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
5091 const LLT HalfTy, const LLT AmtTy) {
5092
5093 Register InL = MRI.createGenericVirtualRegister(Ty: HalfTy);
5094 Register InH = MRI.createGenericVirtualRegister(Ty: HalfTy);
5095 MIRBuilder.buildUnmerge(Res: {InL, InH}, Op: MI.getOperand(i: 1));
5096
5097 if (Amt.isZero()) {
5098 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: {InL, InH});
5099 MI.eraseFromParent();
5100 return Legalized;
5101 }
5102
5103 LLT NVT = HalfTy;
5104 unsigned NVTBits = HalfTy.getSizeInBits();
5105 unsigned VTBits = 2 * NVTBits;
5106
5107 SrcOp Lo(Register(0)), Hi(Register(0));
5108 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5109 if (Amt.ugt(RHS: VTBits)) {
5110 Lo = Hi = MIRBuilder.buildConstant(Res: NVT, Val: 0);
5111 } else if (Amt.ugt(RHS: NVTBits)) {
5112 Lo = MIRBuilder.buildConstant(Res: NVT, Val: 0);
5113 Hi = MIRBuilder.buildShl(Dst: NVT, Src0: InL,
5114 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt - NVTBits));
5115 } else if (Amt == NVTBits) {
5116 Lo = MIRBuilder.buildConstant(Res: NVT, Val: 0);
5117 Hi = InL;
5118 } else {
5119 Lo = MIRBuilder.buildShl(Dst: NVT, Src0: InL, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt));
5120 auto OrLHS =
5121 MIRBuilder.buildShl(Dst: NVT, Src0: InH, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt));
5122 auto OrRHS = MIRBuilder.buildLShr(
5123 Dst: NVT, Src0: InL, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: -Amt + NVTBits));
5124 Hi = MIRBuilder.buildOr(Dst: NVT, Src0: OrLHS, Src1: OrRHS);
5125 }
5126 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5127 if (Amt.ugt(RHS: VTBits)) {
5128 Lo = Hi = MIRBuilder.buildConstant(Res: NVT, Val: 0);
5129 } else if (Amt.ugt(RHS: NVTBits)) {
5130 Lo = MIRBuilder.buildLShr(Dst: NVT, Src0: InH,
5131 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt - NVTBits));
5132 Hi = MIRBuilder.buildConstant(Res: NVT, Val: 0);
5133 } else if (Amt == NVTBits) {
5134 Lo = InH;
5135 Hi = MIRBuilder.buildConstant(Res: NVT, Val: 0);
5136 } else {
5137 auto ShiftAmtConst = MIRBuilder.buildConstant(Res: AmtTy, Val: Amt);
5138
5139 auto OrLHS = MIRBuilder.buildLShr(Dst: NVT, Src0: InL, Src1: ShiftAmtConst);
5140 auto OrRHS = MIRBuilder.buildShl(
5141 Dst: NVT, Src0: InH, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: -Amt + NVTBits));
5142
5143 Lo = MIRBuilder.buildOr(Dst: NVT, Src0: OrLHS, Src1: OrRHS);
5144 Hi = MIRBuilder.buildLShr(Dst: NVT, Src0: InH, Src1: ShiftAmtConst);
5145 }
5146 } else {
5147 if (Amt.ugt(RHS: VTBits)) {
5148 Hi = Lo = MIRBuilder.buildAShr(
5149 Dst: NVT, Src0: InH, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: NVTBits - 1));
5150 } else if (Amt.ugt(RHS: NVTBits)) {
5151 Lo = MIRBuilder.buildAShr(Dst: NVT, Src0: InH,
5152 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: Amt - NVTBits));
5153 Hi = MIRBuilder.buildAShr(Dst: NVT, Src0: InH,
5154 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: NVTBits - 1));
5155 } else if (Amt == NVTBits) {
5156 Lo = InH;
5157 Hi = MIRBuilder.buildAShr(Dst: NVT, Src0: InH,
5158 Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: NVTBits - 1));
5159 } else {
5160 auto ShiftAmtConst = MIRBuilder.buildConstant(Res: AmtTy, Val: Amt);
5161
5162 auto OrLHS = MIRBuilder.buildLShr(Dst: NVT, Src0: InL, Src1: ShiftAmtConst);
5163 auto OrRHS = MIRBuilder.buildShl(
5164 Dst: NVT, Src0: InH, Src1: MIRBuilder.buildConstant(Res: AmtTy, Val: -Amt + NVTBits));
5165
5166 Lo = MIRBuilder.buildOr(Dst: NVT, Src0: OrLHS, Src1: OrRHS);
5167 Hi = MIRBuilder.buildAShr(Dst: NVT, Src0: InH, Src1: ShiftAmtConst);
5168 }
5169 }
5170
5171 MIRBuilder.buildMergeLikeInstr(Res: MI.getOperand(i: 0), Ops: {Lo, Hi});
5172 MI.eraseFromParent();
5173
5174 return Legalized;
5175}
5176
5177// TODO: Optimize if constant shift amount.
5178LegalizerHelper::LegalizeResult
5179LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
5180 LLT RequestedTy) {
5181 if (TypeIdx == 1) {
5182 Observer.changingInstr(MI);
5183 narrowScalarSrc(MI, NarrowTy: RequestedTy, OpIdx: 2);
5184 Observer.changedInstr(MI);
5185 return Legalized;
5186 }
5187
5188 Register DstReg = MI.getOperand(i: 0).getReg();
5189 LLT DstTy = MRI.getType(Reg: DstReg);
5190 if (DstTy.isVector())
5191 return UnableToLegalize;
5192
5193 Register Amt = MI.getOperand(i: 2).getReg();
5194 LLT ShiftAmtTy = MRI.getType(Reg: Amt);
5195 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5196 if (DstEltSize % 2 != 0)
5197 return UnableToLegalize;
5198
5199 // Ignore the input type. We can only go to exactly half the size of the
5200 // input. If that isn't small enough, the resulting pieces will be further
5201 // legalized.
5202 const unsigned NewBitSize = DstEltSize / 2;
5203 const LLT HalfTy = LLT::scalar(SizeInBits: NewBitSize);
5204 const LLT CondTy = LLT::scalar(SizeInBits: 1);
5205
5206 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: Amt, MRI)) {
5207 return narrowScalarShiftByConstant(MI, Amt: VRegAndVal->Value, HalfTy,
5208 AmtTy: ShiftAmtTy);
5209 }
5210
5211 // TODO: Expand with known bits.
5212
5213 // Handle the fully general expansion by an unknown amount.
5214 auto NewBits = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: NewBitSize);
5215
5216 Register InL = MRI.createGenericVirtualRegister(Ty: HalfTy);
5217 Register InH = MRI.createGenericVirtualRegister(Ty: HalfTy);
5218 MIRBuilder.buildUnmerge(Res: {InL, InH}, Op: MI.getOperand(i: 1));
5219
5220 auto AmtExcess = MIRBuilder.buildSub(Dst: ShiftAmtTy, Src0: Amt, Src1: NewBits);
5221 auto AmtLack = MIRBuilder.buildSub(Dst: ShiftAmtTy, Src0: NewBits, Src1: Amt);
5222
5223 auto Zero = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: 0);
5224 auto IsShort = MIRBuilder.buildICmp(Pred: ICmpInst::ICMP_ULT, Res: CondTy, Op0: Amt, Op1: NewBits);
5225 auto IsZero = MIRBuilder.buildICmp(Pred: ICmpInst::ICMP_EQ, Res: CondTy, Op0: Amt, Op1: Zero);
5226
5227 Register ResultRegs[2];
5228 switch (MI.getOpcode()) {
5229 case TargetOpcode::G_SHL: {
5230 // Short: ShAmt < NewBitSize
5231 auto LoS = MIRBuilder.buildShl(Dst: HalfTy, Src0: InL, Src1: Amt);
5232
5233 auto LoOr = MIRBuilder.buildLShr(Dst: HalfTy, Src0: InL, Src1: AmtLack);
5234 auto HiOr = MIRBuilder.buildShl(Dst: HalfTy, Src0: InH, Src1: Amt);
5235 auto HiS = MIRBuilder.buildOr(Dst: HalfTy, Src0: LoOr, Src1: HiOr);
5236
5237 // Long: ShAmt >= NewBitSize
5238 auto LoL = MIRBuilder.buildConstant(Res: HalfTy, Val: 0); // Lo part is zero.
5239 auto HiL = MIRBuilder.buildShl(Dst: HalfTy, Src0: InL, Src1: AmtExcess); // Hi from Lo part.
5240
5241 auto Lo = MIRBuilder.buildSelect(Res: HalfTy, Tst: IsShort, Op0: LoS, Op1: LoL);
5242 auto Hi = MIRBuilder.buildSelect(
5243 Res: HalfTy, Tst: IsZero, Op0: InH, Op1: MIRBuilder.buildSelect(Res: HalfTy, Tst: IsShort, Op0: HiS, Op1: HiL));
5244
5245 ResultRegs[0] = Lo.getReg(Idx: 0);
5246 ResultRegs[1] = Hi.getReg(Idx: 0);
5247 break;
5248 }
5249 case TargetOpcode::G_LSHR:
5250 case TargetOpcode::G_ASHR: {
5251 // Short: ShAmt < NewBitSize
5252 auto HiS = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {HalfTy}, SrcOps: {InH, Amt});
5253
5254 auto LoOr = MIRBuilder.buildLShr(Dst: HalfTy, Src0: InL, Src1: Amt);
5255 auto HiOr = MIRBuilder.buildShl(Dst: HalfTy, Src0: InH, Src1: AmtLack);
5256 auto LoS = MIRBuilder.buildOr(Dst: HalfTy, Src0: LoOr, Src1: HiOr);
5257
5258 // Long: ShAmt >= NewBitSize
5259 MachineInstrBuilder HiL;
5260 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5261 HiL = MIRBuilder.buildConstant(Res: HalfTy, Val: 0); // Hi part is zero.
5262 } else {
5263 auto ShiftAmt = MIRBuilder.buildConstant(Res: ShiftAmtTy, Val: NewBitSize - 1);
5264 HiL = MIRBuilder.buildAShr(Dst: HalfTy, Src0: InH, Src1: ShiftAmt); // Sign of Hi part.
5265 }
5266 auto LoL = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {HalfTy},
5267 SrcOps: {InH, AmtExcess}); // Lo from Hi part.
5268
5269 auto Lo = MIRBuilder.buildSelect(
5270 Res: HalfTy, Tst: IsZero, Op0: InL, Op1: MIRBuilder.buildSelect(Res: HalfTy, Tst: IsShort, Op0: LoS, Op1: LoL));
5271
5272 auto Hi = MIRBuilder.buildSelect(Res: HalfTy, Tst: IsShort, Op0: HiS, Op1: HiL);
5273
5274 ResultRegs[0] = Lo.getReg(Idx: 0);
5275 ResultRegs[1] = Hi.getReg(Idx: 0);
5276 break;
5277 }
5278 default:
5279 llvm_unreachable("not a shift");
5280 }
5281
5282 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: ResultRegs);
5283 MI.eraseFromParent();
5284 return Legalized;
5285}
5286
5287LegalizerHelper::LegalizeResult
5288LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
5289 LLT MoreTy) {
5290 assert(TypeIdx == 0 && "Expecting only Idx 0");
5291
5292 Observer.changingInstr(MI);
5293 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5294 MachineBasicBlock &OpMBB = *MI.getOperand(i: I + 1).getMBB();
5295 MIRBuilder.setInsertPt(MBB&: OpMBB, II: OpMBB.getFirstTerminator());
5296 moreElementsVectorSrc(MI, MoreTy, OpIdx: I);
5297 }
5298
5299 MachineBasicBlock &MBB = *MI.getParent();
5300 MIRBuilder.setInsertPt(MBB, II: --MBB.getFirstNonPHI());
5301 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
5302 Observer.changedInstr(MI);
5303 return Legalized;
5304}
5305
5306MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5307 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5308 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5309
5310 switch (Opcode) {
5311 default:
5312 llvm_unreachable(
5313 "getNeutralElementForVecReduce called with invalid opcode!");
5314 case TargetOpcode::G_VECREDUCE_ADD:
5315 case TargetOpcode::G_VECREDUCE_OR:
5316 case TargetOpcode::G_VECREDUCE_XOR:
5317 case TargetOpcode::G_VECREDUCE_UMAX:
5318 return MIRBuilder.buildConstant(Res: Ty, Val: 0);
5319 case TargetOpcode::G_VECREDUCE_MUL:
5320 return MIRBuilder.buildConstant(Res: Ty, Val: 1);
5321 case TargetOpcode::G_VECREDUCE_AND:
5322 case TargetOpcode::G_VECREDUCE_UMIN:
5323 return MIRBuilder.buildConstant(
5324 Res: Ty, Val: APInt::getAllOnes(numBits: Ty.getScalarSizeInBits()));
5325 case TargetOpcode::G_VECREDUCE_SMAX:
5326 return MIRBuilder.buildConstant(
5327 Res: Ty, Val: APInt::getSignedMinValue(numBits: Ty.getSizeInBits()));
5328 case TargetOpcode::G_VECREDUCE_SMIN:
5329 return MIRBuilder.buildConstant(
5330 Res: Ty, Val: APInt::getSignedMaxValue(numBits: Ty.getSizeInBits()));
5331 case TargetOpcode::G_VECREDUCE_FADD:
5332 return MIRBuilder.buildFConstant(Res: Ty, Val: -0.0);
5333 case TargetOpcode::G_VECREDUCE_FMUL:
5334 return MIRBuilder.buildFConstant(Res: Ty, Val: 1.0);
5335 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5336 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5337 assert(false && "getNeutralElementForVecReduce unimplemented for "
5338 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5339 }
5340 llvm_unreachable("switch expected to return!");
5341}
5342
5343LegalizerHelper::LegalizeResult
5344LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
5345 LLT MoreTy) {
5346 unsigned Opc = MI.getOpcode();
5347 switch (Opc) {
5348 case TargetOpcode::G_IMPLICIT_DEF:
5349 case TargetOpcode::G_LOAD: {
5350 if (TypeIdx != 0)
5351 return UnableToLegalize;
5352 Observer.changingInstr(MI);
5353 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
5354 Observer.changedInstr(MI);
5355 return Legalized;
5356 }
5357 case TargetOpcode::G_STORE:
5358 if (TypeIdx != 0)
5359 return UnableToLegalize;
5360 Observer.changingInstr(MI);
5361 moreElementsVectorSrc(MI, MoreTy, OpIdx: 0);
5362 Observer.changedInstr(MI);
5363 return Legalized;
5364 case TargetOpcode::G_AND:
5365 case TargetOpcode::G_OR:
5366 case TargetOpcode::G_XOR:
5367 case TargetOpcode::G_ADD:
5368 case TargetOpcode::G_SUB:
5369 case TargetOpcode::G_MUL:
5370 case TargetOpcode::G_FADD:
5371 case TargetOpcode::G_FSUB:
5372 case TargetOpcode::G_FMUL:
5373 case TargetOpcode::G_FDIV:
5374 case TargetOpcode::G_FCOPYSIGN:
5375 case TargetOpcode::G_UADDSAT:
5376 case TargetOpcode::G_USUBSAT:
5377 case TargetOpcode::G_SADDSAT:
5378 case TargetOpcode::G_SSUBSAT:
5379 case TargetOpcode::G_SMIN:
5380 case TargetOpcode::G_SMAX:
5381 case TargetOpcode::G_UMIN:
5382 case TargetOpcode::G_UMAX:
5383 case TargetOpcode::G_FMINNUM:
5384 case TargetOpcode::G_FMAXNUM:
5385 case TargetOpcode::G_FMINNUM_IEEE:
5386 case TargetOpcode::G_FMAXNUM_IEEE:
5387 case TargetOpcode::G_FMINIMUM:
5388 case TargetOpcode::G_FMAXIMUM:
5389 case TargetOpcode::G_STRICT_FADD:
5390 case TargetOpcode::G_STRICT_FSUB:
5391 case TargetOpcode::G_STRICT_FMUL:
5392 case TargetOpcode::G_SHL:
5393 case TargetOpcode::G_ASHR:
5394 case TargetOpcode::G_LSHR: {
5395 Observer.changingInstr(MI);
5396 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
5397 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
5398 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
5399 Observer.changedInstr(MI);
5400 return Legalized;
5401 }
5402 case TargetOpcode::G_FMA:
5403 case TargetOpcode::G_STRICT_FMA:
5404 case TargetOpcode::G_FSHR:
5405 case TargetOpcode::G_FSHL: {
5406 Observer.changingInstr(MI);
5407 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
5408 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
5409 moreElementsVectorSrc(MI, MoreTy, OpIdx: 3);
5410 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
5411 Observer.changedInstr(MI);
5412 return Legalized;
5413 }
5414 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5415 case TargetOpcode::G_EXTRACT:
5416 if (TypeIdx != 1)
5417 return UnableToLegalize;
5418 Observer.changingInstr(MI);
5419 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
5420 Observer.changedInstr(MI);
5421 return Legalized;
5422 case TargetOpcode::G_INSERT:
5423 case TargetOpcode::G_INSERT_VECTOR_ELT:
5424 case TargetOpcode::G_FREEZE:
5425 case TargetOpcode::G_FNEG:
5426 case TargetOpcode::G_FABS:
5427 case TargetOpcode::G_FSQRT:
5428 case TargetOpcode::G_FCEIL:
5429 case TargetOpcode::G_FFLOOR:
5430 case TargetOpcode::G_FNEARBYINT:
5431 case TargetOpcode::G_FRINT:
5432 case TargetOpcode::G_INTRINSIC_ROUND:
5433 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5434 case TargetOpcode::G_INTRINSIC_TRUNC:
5435 case TargetOpcode::G_BSWAP:
5436 case TargetOpcode::G_FCANONICALIZE:
5437 case TargetOpcode::G_SEXT_INREG:
5438 case TargetOpcode::G_ABS:
5439 if (TypeIdx != 0)
5440 return UnableToLegalize;
5441 Observer.changingInstr(MI);
5442 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
5443 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
5444 Observer.changedInstr(MI);
5445 return Legalized;
5446 case TargetOpcode::G_SELECT: {
5447 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
5448 if (TypeIdx == 1) {
5449 if (!CondTy.isScalar() ||
5450 DstTy.getElementCount() != MoreTy.getElementCount())
5451 return UnableToLegalize;
5452
5453 // This is turning a scalar select of vectors into a vector
5454 // select. Broadcast the select condition.
5455 auto ShufSplat = MIRBuilder.buildShuffleSplat(Res: MoreTy, Src: CondReg);
5456 Observer.changingInstr(MI);
5457 MI.getOperand(i: 1).setReg(ShufSplat.getReg(Idx: 0));
5458 Observer.changedInstr(MI);
5459 return Legalized;
5460 }
5461
5462 if (CondTy.isVector())
5463 return UnableToLegalize;
5464
5465 Observer.changingInstr(MI);
5466 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
5467 moreElementsVectorSrc(MI, MoreTy, OpIdx: 3);
5468 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
5469 Observer.changedInstr(MI);
5470 return Legalized;
5471 }
5472 case TargetOpcode::G_UNMERGE_VALUES:
5473 return UnableToLegalize;
5474 case TargetOpcode::G_PHI:
5475 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5476 case TargetOpcode::G_SHUFFLE_VECTOR:
5477 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
5478 case TargetOpcode::G_BUILD_VECTOR: {
5479 SmallVector<SrcOp, 8> Elts;
5480 for (auto Op : MI.uses()) {
5481 Elts.push_back(Elt: Op.getReg());
5482 }
5483
5484 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
5485 Elts.push_back(Elt: MIRBuilder.buildUndef(Res: MoreTy.getScalarType()));
5486 }
5487
5488 MIRBuilder.buildDeleteTrailingVectorElements(
5489 Res: MI.getOperand(i: 0).getReg(), Op0: MIRBuilder.buildInstr(Opc, DstOps: {MoreTy}, SrcOps: Elts));
5490 MI.eraseFromParent();
5491 return Legalized;
5492 }
5493 case TargetOpcode::G_SEXT:
5494 case TargetOpcode::G_ZEXT:
5495 case TargetOpcode::G_ANYEXT:
5496 case TargetOpcode::G_TRUNC:
5497 case TargetOpcode::G_FPTRUNC:
5498 case TargetOpcode::G_FPEXT:
5499 case TargetOpcode::G_FPTOSI:
5500 case TargetOpcode::G_FPTOUI:
5501 case TargetOpcode::G_SITOFP:
5502 case TargetOpcode::G_UITOFP: {
5503 Observer.changingInstr(MI);
5504 LLT SrcExtTy;
5505 LLT DstExtTy;
5506 if (TypeIdx == 0) {
5507 DstExtTy = MoreTy;
5508 SrcExtTy = LLT::fixed_vector(
5509 NumElements: MoreTy.getNumElements(),
5510 ScalarTy: MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getElementType());
5511 } else {
5512 DstExtTy = LLT::fixed_vector(
5513 NumElements: MoreTy.getNumElements(),
5514 ScalarTy: MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getElementType());
5515 SrcExtTy = MoreTy;
5516 }
5517 moreElementsVectorSrc(MI, MoreTy: SrcExtTy, OpIdx: 1);
5518 moreElementsVectorDst(MI, WideTy: DstExtTy, OpIdx: 0);
5519 Observer.changedInstr(MI);
5520 return Legalized;
5521 }
5522 case TargetOpcode::G_ICMP:
5523 case TargetOpcode::G_FCMP: {
5524 if (TypeIdx != 1)
5525 return UnableToLegalize;
5526
5527 Observer.changingInstr(MI);
5528 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
5529 moreElementsVectorSrc(MI, MoreTy, OpIdx: 3);
5530 LLT CondTy = LLT::fixed_vector(
5531 NumElements: MoreTy.getNumElements(),
5532 ScalarTy: MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getElementType());
5533 moreElementsVectorDst(MI, WideTy: CondTy, OpIdx: 0);
5534 Observer.changedInstr(MI);
5535 return Legalized;
5536 }
5537 case TargetOpcode::G_BITCAST: {
5538 if (TypeIdx != 0)
5539 return UnableToLegalize;
5540
5541 LLT SrcTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
5542 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
5543
5544 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
5545 if (coefficient % DstTy.getNumElements() != 0)
5546 return UnableToLegalize;
5547
5548 coefficient = coefficient / DstTy.getNumElements();
5549
5550 LLT NewTy = SrcTy.changeElementCount(
5551 EC: ElementCount::get(MinVal: coefficient, Scalable: MoreTy.isScalable()));
5552 Observer.changingInstr(MI);
5553 moreElementsVectorSrc(MI, MoreTy: NewTy, OpIdx: 1);
5554 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
5555 Observer.changedInstr(MI);
5556 return Legalized;
5557 }
5558 case TargetOpcode::G_VECREDUCE_FADD:
5559 case TargetOpcode::G_VECREDUCE_FMUL:
5560 case TargetOpcode::G_VECREDUCE_ADD:
5561 case TargetOpcode::G_VECREDUCE_MUL:
5562 case TargetOpcode::G_VECREDUCE_AND:
5563 case TargetOpcode::G_VECREDUCE_OR:
5564 case TargetOpcode::G_VECREDUCE_XOR:
5565 case TargetOpcode::G_VECREDUCE_SMAX:
5566 case TargetOpcode::G_VECREDUCE_SMIN:
5567 case TargetOpcode::G_VECREDUCE_UMAX:
5568 case TargetOpcode::G_VECREDUCE_UMIN: {
5569 LLT OrigTy = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
5570 MachineOperand &MO = MI.getOperand(i: 1);
5571 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(Res: MoreTy, Op0: MO);
5572 auto NeutralElement = getNeutralElementForVecReduce(
5573 Opcode: MI.getOpcode(), MIRBuilder, Ty: MoreTy.getElementType());
5574
5575 LLT IdxTy(TLI.getVectorIdxTy(DL: MIRBuilder.getDataLayout()));
5576 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5577 i != e; i++) {
5578 auto Idx = MIRBuilder.buildConstant(Res: IdxTy, Val: i);
5579 NewVec = MIRBuilder.buildInsertVectorElement(Res: MoreTy, Val: NewVec,
5580 Elt: NeutralElement, Idx);
5581 }
5582
5583 Observer.changingInstr(MI);
5584 MO.setReg(NewVec.getReg(Idx: 0));
5585 Observer.changedInstr(MI);
5586 return Legalized;
5587 }
5588
5589 default:
5590 return UnableToLegalize;
5591 }
5592}
5593
5594LegalizerHelper::LegalizeResult
5595LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
5596 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5597 ArrayRef<int> Mask = MI.getOperand(i: 3).getShuffleMask();
5598 unsigned MaskNumElts = Mask.size();
5599 unsigned SrcNumElts = SrcTy.getNumElements();
5600 LLT DestEltTy = DstTy.getElementType();
5601
5602 if (MaskNumElts == SrcNumElts)
5603 return Legalized;
5604
5605 if (MaskNumElts < SrcNumElts) {
5606 // Extend mask to match new destination vector size with
5607 // undef values.
5608 SmallVector<int, 16> NewMask(Mask);
5609 for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
5610 NewMask.push_back(Elt: -1);
5611
5612 moreElementsVectorDst(MI, WideTy: SrcTy, OpIdx: 0);
5613 MIRBuilder.setInstrAndDebugLoc(MI);
5614 MIRBuilder.buildShuffleVector(Res: MI.getOperand(i: 0).getReg(),
5615 Src1: MI.getOperand(i: 1).getReg(),
5616 Src2: MI.getOperand(i: 2).getReg(), Mask: NewMask);
5617 MI.eraseFromParent();
5618
5619 return Legalized;
5620 }
5621
5622 unsigned PaddedMaskNumElts = alignTo(Value: MaskNumElts, Align: SrcNumElts);
5623 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5624 LLT PaddedTy = LLT::fixed_vector(NumElements: PaddedMaskNumElts, ScalarTy: DestEltTy);
5625
5626 // Create new source vectors by concatenating the initial
5627 // source vectors with undefined vectors of the same size.
5628 auto Undef = MIRBuilder.buildUndef(Res: SrcTy);
5629 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(Idx: 0));
5630 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(Idx: 0));
5631 MOps1[0] = MI.getOperand(i: 1).getReg();
5632 MOps2[0] = MI.getOperand(i: 2).getReg();
5633
5634 auto Src1 = MIRBuilder.buildConcatVectors(Res: PaddedTy, Ops: MOps1);
5635 auto Src2 = MIRBuilder.buildConcatVectors(Res: PaddedTy, Ops: MOps2);
5636
5637 // Readjust mask for new input vector length.
5638 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5639 for (unsigned I = 0; I != MaskNumElts; ++I) {
5640 int Idx = Mask[I];
5641 if (Idx >= static_cast<int>(SrcNumElts))
5642 Idx += PaddedMaskNumElts - SrcNumElts;
5643 MappedOps[I] = Idx;
5644 }
5645
5646 // If we got more elements than required, extract subvector.
5647 if (MaskNumElts != PaddedMaskNumElts) {
5648 auto Shuffle =
5649 MIRBuilder.buildShuffleVector(Res: PaddedTy, Src1, Src2, Mask: MappedOps);
5650
5651 SmallVector<Register, 16> Elts(MaskNumElts);
5652 for (unsigned I = 0; I < MaskNumElts; ++I) {
5653 Elts[I] =
5654 MIRBuilder.buildExtractVectorElementConstant(Res: DestEltTy, Val: Shuffle, Idx: I)
5655 .getReg(Idx: 0);
5656 }
5657 MIRBuilder.buildBuildVector(Res: DstReg, Ops: Elts);
5658 } else {
5659 MIRBuilder.buildShuffleVector(Res: DstReg, Src1, Src2, Mask: MappedOps);
5660 }
5661
5662 MI.eraseFromParent();
5663 return LegalizerHelper::LegalizeResult::Legalized;
5664}
5665
5666LegalizerHelper::LegalizeResult
5667LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
5668 unsigned int TypeIdx, LLT MoreTy) {
5669 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5670 ArrayRef<int> Mask = MI.getOperand(i: 3).getShuffleMask();
5671 unsigned NumElts = DstTy.getNumElements();
5672 unsigned WidenNumElts = MoreTy.getNumElements();
5673
5674 if (DstTy.isVector() && Src1Ty.isVector() &&
5675 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5676 return equalizeVectorShuffleLengths(MI);
5677 }
5678
5679 if (TypeIdx != 0)
5680 return UnableToLegalize;
5681
5682 // Expect a canonicalized shuffle.
5683 if (DstTy != Src1Ty || DstTy != Src2Ty)
5684 return UnableToLegalize;
5685
5686 moreElementsVectorSrc(MI, MoreTy, OpIdx: 1);
5687 moreElementsVectorSrc(MI, MoreTy, OpIdx: 2);
5688
5689 // Adjust mask based on new input vector length.
5690 SmallVector<int, 16> NewMask;
5691 for (unsigned I = 0; I != NumElts; ++I) {
5692 int Idx = Mask[I];
5693 if (Idx < static_cast<int>(NumElts))
5694 NewMask.push_back(Elt: Idx);
5695 else
5696 NewMask.push_back(Elt: Idx - NumElts + WidenNumElts);
5697 }
5698 for (unsigned I = NumElts; I != WidenNumElts; ++I)
5699 NewMask.push_back(Elt: -1);
5700 moreElementsVectorDst(MI, WideTy: MoreTy, OpIdx: 0);
5701 MIRBuilder.setInstrAndDebugLoc(MI);
5702 MIRBuilder.buildShuffleVector(Res: MI.getOperand(i: 0).getReg(),
5703 Src1: MI.getOperand(i: 1).getReg(),
5704 Src2: MI.getOperand(i: 2).getReg(), Mask: NewMask);
5705 MI.eraseFromParent();
5706 return Legalized;
5707}
5708
5709void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
5710 ArrayRef<Register> Src1Regs,
5711 ArrayRef<Register> Src2Regs,
5712 LLT NarrowTy) {
5713 MachineIRBuilder &B = MIRBuilder;
5714 unsigned SrcParts = Src1Regs.size();
5715 unsigned DstParts = DstRegs.size();
5716
5717 unsigned DstIdx = 0; // Low bits of the result.
5718 Register FactorSum =
5719 B.buildMul(Dst: NarrowTy, Src0: Src1Regs[DstIdx], Src1: Src2Regs[DstIdx]).getReg(Idx: 0);
5720 DstRegs[DstIdx] = FactorSum;
5721
5722 unsigned CarrySumPrevDstIdx;
5723 SmallVector<Register, 4> Factors;
5724
5725 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5726 // Collect low parts of muls for DstIdx.
5727 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5728 i <= std::min(a: DstIdx, b: SrcParts - 1); ++i) {
5729 MachineInstrBuilder Mul =
5730 B.buildMul(Dst: NarrowTy, Src0: Src1Regs[DstIdx - i], Src1: Src2Regs[i]);
5731 Factors.push_back(Elt: Mul.getReg(Idx: 0));
5732 }
5733 // Collect high parts of muls from previous DstIdx.
5734 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5735 i <= std::min(a: DstIdx - 1, b: SrcParts - 1); ++i) {
5736 MachineInstrBuilder Umulh =
5737 B.buildUMulH(Dst: NarrowTy, Src0: Src1Regs[DstIdx - 1 - i], Src1: Src2Regs[i]);
5738 Factors.push_back(Elt: Umulh.getReg(Idx: 0));
5739 }
5740 // Add CarrySum from additions calculated for previous DstIdx.
5741 if (DstIdx != 1) {
5742 Factors.push_back(Elt: CarrySumPrevDstIdx);
5743 }
5744
5745 Register CarrySum;
5746 // Add all factors and accumulate all carries into CarrySum.
5747 if (DstIdx != DstParts - 1) {
5748 MachineInstrBuilder Uaddo =
5749 B.buildUAddo(Res: NarrowTy, CarryOut: LLT::scalar(SizeInBits: 1), Op0: Factors[0], Op1: Factors[1]);
5750 FactorSum = Uaddo.getReg(Idx: 0);
5751 CarrySum = B.buildZExt(Res: NarrowTy, Op: Uaddo.getReg(Idx: 1)).getReg(Idx: 0);
5752 for (unsigned i = 2; i < Factors.size(); ++i) {
5753 MachineInstrBuilder Uaddo =
5754 B.buildUAddo(Res: NarrowTy, CarryOut: LLT::scalar(SizeInBits: 1), Op0: FactorSum, Op1: Factors[i]);
5755 FactorSum = Uaddo.getReg(Idx: 0);
5756 MachineInstrBuilder Carry = B.buildZExt(Res: NarrowTy, Op: Uaddo.getReg(Idx: 1));
5757 CarrySum = B.buildAdd(Dst: NarrowTy, Src0: CarrySum, Src1: Carry).getReg(Idx: 0);
5758 }
5759 } else {
5760 // Since value for the next index is not calculated, neither is CarrySum.
5761 FactorSum = B.buildAdd(Dst: NarrowTy, Src0: Factors[0], Src1: Factors[1]).getReg(Idx: 0);
5762 for (unsigned i = 2; i < Factors.size(); ++i)
5763 FactorSum = B.buildAdd(Dst: NarrowTy, Src0: FactorSum, Src1: Factors[i]).getReg(Idx: 0);
5764 }
5765
5766 CarrySumPrevDstIdx = CarrySum;
5767 DstRegs[DstIdx] = FactorSum;
5768 Factors.clear();
5769 }
5770}
5771
5772LegalizerHelper::LegalizeResult
5773LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
5774 LLT NarrowTy) {
5775 if (TypeIdx != 0)
5776 return UnableToLegalize;
5777
5778 Register DstReg = MI.getOperand(i: 0).getReg();
5779 LLT DstType = MRI.getType(Reg: DstReg);
5780 // FIXME: add support for vector types
5781 if (DstType.isVector())
5782 return UnableToLegalize;
5783
5784 unsigned Opcode = MI.getOpcode();
5785 unsigned OpO, OpE, OpF;
5786 switch (Opcode) {
5787 case TargetOpcode::G_SADDO:
5788 case TargetOpcode::G_SADDE:
5789 case TargetOpcode::G_UADDO:
5790 case TargetOpcode::G_UADDE:
5791 case TargetOpcode::G_ADD:
5792 OpO = TargetOpcode::G_UADDO;
5793 OpE = TargetOpcode::G_UADDE;
5794 OpF = TargetOpcode::G_UADDE;
5795 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5796 OpF = TargetOpcode::G_SADDE;
5797 break;
5798 case TargetOpcode::G_SSUBO:
5799 case TargetOpcode::G_SSUBE:
5800 case TargetOpcode::G_USUBO:
5801 case TargetOpcode::G_USUBE:
5802 case TargetOpcode::G_SUB:
5803 OpO = TargetOpcode::G_USUBO;
5804 OpE = TargetOpcode::G_USUBE;
5805 OpF = TargetOpcode::G_USUBE;
5806 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5807 OpF = TargetOpcode::G_SSUBE;
5808 break;
5809 default:
5810 llvm_unreachable("Unexpected add/sub opcode!");
5811 }
5812
5813 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5814 unsigned NumDefs = MI.getNumExplicitDefs();
5815 Register Src1 = MI.getOperand(i: NumDefs).getReg();
5816 Register Src2 = MI.getOperand(i: NumDefs + 1).getReg();
5817 Register CarryDst, CarryIn;
5818 if (NumDefs == 2)
5819 CarryDst = MI.getOperand(i: 1).getReg();
5820 if (MI.getNumOperands() == NumDefs + 3)
5821 CarryIn = MI.getOperand(i: NumDefs + 2).getReg();
5822
5823 LLT RegTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
5824 LLT LeftoverTy, DummyTy;
5825 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5826 extractParts(Reg: Src1, RegTy, MainTy: NarrowTy, LeftoverTy, VRegs&: Src1Regs, LeftoverVRegs&: Src1Left,
5827 MIRBuilder, MRI);
5828 extractParts(Reg: Src2, RegTy, MainTy: NarrowTy, LeftoverTy&: DummyTy, VRegs&: Src2Regs, LeftoverVRegs&: Src2Left, MIRBuilder,
5829 MRI);
5830
5831 int NarrowParts = Src1Regs.size();
5832 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5833 Src1Regs.push_back(Elt: Src1Left[I]);
5834 Src2Regs.push_back(Elt: Src2Left[I]);
5835 }
5836 DstRegs.reserve(N: Src1Regs.size());
5837
5838 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5839 Register DstReg =
5840 MRI.createGenericVirtualRegister(Ty: MRI.getType(Reg: Src1Regs[i]));
5841 Register CarryOut = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 1));
5842 // Forward the final carry-out to the destination register
5843 if (i == e - 1 && CarryDst)
5844 CarryOut = CarryDst;
5845
5846 if (!CarryIn) {
5847 MIRBuilder.buildInstr(Opc: OpO, DstOps: {DstReg, CarryOut},
5848 SrcOps: {Src1Regs[i], Src2Regs[i]});
5849 } else if (i == e - 1) {
5850 MIRBuilder.buildInstr(Opc: OpF, DstOps: {DstReg, CarryOut},
5851 SrcOps: {Src1Regs[i], Src2Regs[i], CarryIn});
5852 } else {
5853 MIRBuilder.buildInstr(Opc: OpE, DstOps: {DstReg, CarryOut},
5854 SrcOps: {Src1Regs[i], Src2Regs[i], CarryIn});
5855 }
5856
5857 DstRegs.push_back(Elt: DstReg);
5858 CarryIn = CarryOut;
5859 }
5860 insertParts(DstReg: MI.getOperand(i: 0).getReg(), ResultTy: RegTy, PartTy: NarrowTy,
5861 PartRegs: ArrayRef(DstRegs).take_front(N: NarrowParts), LeftoverTy,
5862 LeftoverRegs: ArrayRef(DstRegs).drop_front(N: NarrowParts));
5863
5864 MI.eraseFromParent();
5865 return Legalized;
5866}
5867
5868LegalizerHelper::LegalizeResult
5869LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
5870 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
5871
5872 LLT Ty = MRI.getType(Reg: DstReg);
5873 if (Ty.isVector())
5874 return UnableToLegalize;
5875
5876 unsigned Size = Ty.getSizeInBits();
5877 unsigned NarrowSize = NarrowTy.getSizeInBits();
5878 if (Size % NarrowSize != 0)
5879 return UnableToLegalize;
5880
5881 unsigned NumParts = Size / NarrowSize;
5882 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5883 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
5884
5885 SmallVector<Register, 2> Src1Parts, Src2Parts;
5886 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
5887 extractParts(Reg: Src1, Ty: NarrowTy, NumParts, VRegs&: Src1Parts, MIRBuilder, MRI);
5888 extractParts(Reg: Src2, Ty: NarrowTy, NumParts, VRegs&: Src2Parts, MIRBuilder, MRI);
5889 multiplyRegisters(DstRegs&: DstTmpRegs, Src1Regs: Src1Parts, Src2Regs: Src2Parts, NarrowTy);
5890
5891 // Take only high half of registers if this is high mul.
5892 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
5893 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
5894 MI.eraseFromParent();
5895 return Legalized;
5896}
5897
5898LegalizerHelper::LegalizeResult
5899LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
5900 LLT NarrowTy) {
5901 if (TypeIdx != 0)
5902 return UnableToLegalize;
5903
5904 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
5905
5906 Register Src = MI.getOperand(i: 1).getReg();
5907 LLT SrcTy = MRI.getType(Reg: Src);
5908
5909 // If all finite floats fit into the narrowed integer type, we can just swap
5910 // out the result type. This is practically only useful for conversions from
5911 // half to at least 16-bits, so just handle the one case.
5912 if (SrcTy.getScalarType() != LLT::scalar(SizeInBits: 16) ||
5913 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
5914 return UnableToLegalize;
5915
5916 Observer.changingInstr(MI);
5917 narrowScalarDst(MI, NarrowTy, OpIdx: 0,
5918 ExtOpcode: IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
5919 Observer.changedInstr(MI);
5920 return Legalized;
5921}
5922
5923LegalizerHelper::LegalizeResult
5924LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
5925 LLT NarrowTy) {
5926 if (TypeIdx != 1)
5927 return UnableToLegalize;
5928
5929 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5930
5931 int64_t SizeOp1 = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
5932 // FIXME: add support for when SizeOp1 isn't an exact multiple of
5933 // NarrowSize.
5934 if (SizeOp1 % NarrowSize != 0)
5935 return UnableToLegalize;
5936 int NumParts = SizeOp1 / NarrowSize;
5937
5938 SmallVector<Register, 2> SrcRegs, DstRegs;
5939 SmallVector<uint64_t, 2> Indexes;
5940 extractParts(Reg: MI.getOperand(i: 1).getReg(), Ty: NarrowTy, NumParts, VRegs&: SrcRegs,
5941 MIRBuilder, MRI);
5942
5943 Register OpReg = MI.getOperand(i: 0).getReg();
5944 uint64_t OpStart = MI.getOperand(i: 2).getImm();
5945 uint64_t OpSize = MRI.getType(Reg: OpReg).getSizeInBits();
5946 for (int i = 0; i < NumParts; ++i) {
5947 unsigned SrcStart = i * NarrowSize;
5948
5949 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
5950 // No part of the extract uses this subregister, ignore it.
5951 continue;
5952 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(Reg: OpReg)) {
5953 // The entire subregister is extracted, forward the value.
5954 DstRegs.push_back(Elt: SrcRegs[i]);
5955 continue;
5956 }
5957
5958 // OpSegStart is where this destination segment would start in OpReg if it
5959 // extended infinitely in both directions.
5960 int64_t ExtractOffset;
5961 uint64_t SegSize;
5962 if (OpStart < SrcStart) {
5963 ExtractOffset = 0;
5964 SegSize = std::min(a: NarrowSize, b: OpStart + OpSize - SrcStart);
5965 } else {
5966 ExtractOffset = OpStart - SrcStart;
5967 SegSize = std::min(a: SrcStart + NarrowSize - OpStart, b: OpSize);
5968 }
5969
5970 Register SegReg = SrcRegs[i];
5971 if (ExtractOffset != 0 || SegSize != NarrowSize) {
5972 // A genuine extract is needed.
5973 SegReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: SegSize));
5974 MIRBuilder.buildExtract(Res: SegReg, Src: SrcRegs[i], Index: ExtractOffset);
5975 }
5976
5977 DstRegs.push_back(Elt: SegReg);
5978 }
5979
5980 Register DstReg = MI.getOperand(i: 0).getReg();
5981 if (MRI.getType(Reg: DstReg).isVector())
5982 MIRBuilder.buildBuildVector(Res: DstReg, Ops: DstRegs);
5983 else if (DstRegs.size() > 1)
5984 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
5985 else
5986 MIRBuilder.buildCopy(Res: DstReg, Op: DstRegs[0]);
5987 MI.eraseFromParent();
5988 return Legalized;
5989}
5990
5991LegalizerHelper::LegalizeResult
5992LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
5993 LLT NarrowTy) {
5994 // FIXME: Don't know how to handle secondary types yet.
5995 if (TypeIdx != 0)
5996 return UnableToLegalize;
5997
5998 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
5999 SmallVector<uint64_t, 2> Indexes;
6000 LLT RegTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
6001 LLT LeftoverTy;
6002 extractParts(Reg: MI.getOperand(i: 1).getReg(), RegTy, MainTy: NarrowTy, LeftoverTy, VRegs&: SrcRegs,
6003 LeftoverVRegs&: LeftoverRegs, MIRBuilder, MRI);
6004
6005 for (Register Reg : LeftoverRegs)
6006 SrcRegs.push_back(Elt: Reg);
6007
6008 uint64_t NarrowSize = NarrowTy.getSizeInBits();
6009 Register OpReg = MI.getOperand(i: 2).getReg();
6010 uint64_t OpStart = MI.getOperand(i: 3).getImm();
6011 uint64_t OpSize = MRI.getType(Reg: OpReg).getSizeInBits();
6012 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
6013 unsigned DstStart = I * NarrowSize;
6014
6015 if (DstStart == OpStart && NarrowTy == MRI.getType(Reg: OpReg)) {
6016 // The entire subregister is defined by this insert, forward the new
6017 // value.
6018 DstRegs.push_back(Elt: OpReg);
6019 continue;
6020 }
6021
6022 Register SrcReg = SrcRegs[I];
6023 if (MRI.getType(Reg: SrcRegs[I]) == LeftoverTy) {
6024 // The leftover reg is smaller than NarrowTy, so we need to extend it.
6025 SrcReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
6026 MIRBuilder.buildAnyExt(Res: SrcReg, Op: SrcRegs[I]);
6027 }
6028
6029 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6030 // No part of the insert affects this subregister, forward the original.
6031 DstRegs.push_back(Elt: SrcReg);
6032 continue;
6033 }
6034
6035 // OpSegStart is where this destination segment would start in OpReg if it
6036 // extended infinitely in both directions.
6037 int64_t ExtractOffset, InsertOffset;
6038 uint64_t SegSize;
6039 if (OpStart < DstStart) {
6040 InsertOffset = 0;
6041 ExtractOffset = DstStart - OpStart;
6042 SegSize = std::min(a: NarrowSize, b: OpStart + OpSize - DstStart);
6043 } else {
6044 InsertOffset = OpStart - DstStart;
6045 ExtractOffset = 0;
6046 SegSize =
6047 std::min(a: NarrowSize - InsertOffset, b: OpStart + OpSize - DstStart);
6048 }
6049
6050 Register SegReg = OpReg;
6051 if (ExtractOffset != 0 || SegSize != OpSize) {
6052 // A genuine extract is needed.
6053 SegReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: SegSize));
6054 MIRBuilder.buildExtract(Res: SegReg, Src: OpReg, Index: ExtractOffset);
6055 }
6056
6057 Register DstReg = MRI.createGenericVirtualRegister(Ty: NarrowTy);
6058 MIRBuilder.buildInsert(Res: DstReg, Src: SrcReg, Op: SegReg, Index: InsertOffset);
6059 DstRegs.push_back(Elt: DstReg);
6060 }
6061
6062 uint64_t WideSize = DstRegs.size() * NarrowSize;
6063 Register DstReg = MI.getOperand(i: 0).getReg();
6064 if (WideSize > RegTy.getSizeInBits()) {
6065 Register MergeReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: WideSize));
6066 MIRBuilder.buildMergeLikeInstr(Res: MergeReg, Ops: DstRegs);
6067 MIRBuilder.buildTrunc(Res: DstReg, Op: MergeReg);
6068 } else
6069 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: DstRegs);
6070
6071 MI.eraseFromParent();
6072 return Legalized;
6073}
6074
6075LegalizerHelper::LegalizeResult
6076LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
6077 LLT NarrowTy) {
6078 Register DstReg = MI.getOperand(i: 0).getReg();
6079 LLT DstTy = MRI.getType(Reg: DstReg);
6080
6081 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6082
6083 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6084 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6085 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6086 LLT LeftoverTy;
6087 if (!extractParts(Reg: MI.getOperand(i: 1).getReg(), RegTy: DstTy, MainTy: NarrowTy, LeftoverTy,
6088 VRegs&: Src0Regs, LeftoverVRegs&: Src0LeftoverRegs, MIRBuilder, MRI))
6089 return UnableToLegalize;
6090
6091 LLT Unused;
6092 if (!extractParts(Reg: MI.getOperand(i: 2).getReg(), RegTy: DstTy, MainTy: NarrowTy, LeftoverTy&: Unused,
6093 VRegs&: Src1Regs, LeftoverVRegs&: Src1LeftoverRegs, MIRBuilder, MRI))
6094 llvm_unreachable("inconsistent extractParts result");
6095
6096 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6097 auto Inst = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {NarrowTy},
6098 SrcOps: {Src0Regs[I], Src1Regs[I]});
6099 DstRegs.push_back(Elt: Inst.getReg(Idx: 0));
6100 }
6101
6102 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6103 auto Inst = MIRBuilder.buildInstr(
6104 Opc: MI.getOpcode(),
6105 DstOps: {LeftoverTy}, SrcOps: {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6106 DstLeftoverRegs.push_back(Elt: Inst.getReg(Idx: 0));
6107 }
6108
6109 insertParts(DstReg, ResultTy: DstTy, PartTy: NarrowTy, PartRegs: DstRegs,
6110 LeftoverTy, LeftoverRegs: DstLeftoverRegs);
6111
6112 MI.eraseFromParent();
6113 return Legalized;
6114}
6115
6116LegalizerHelper::LegalizeResult
6117LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
6118 LLT NarrowTy) {
6119 if (TypeIdx != 0)
6120 return UnableToLegalize;
6121
6122 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6123
6124 LLT DstTy = MRI.getType(Reg: DstReg);
6125 if (DstTy.isVector())
6126 return UnableToLegalize;
6127
6128 SmallVector<Register, 8> Parts;
6129 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6130 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, VRegs&: Parts, PadStrategy: MI.getOpcode());
6131 buildWidenedRemergeToDst(DstReg, LCMTy, RemergeRegs: Parts);
6132
6133 MI.eraseFromParent();
6134 return Legalized;
6135}
6136
6137LegalizerHelper::LegalizeResult
6138LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
6139 LLT NarrowTy) {
6140 if (TypeIdx != 0)
6141 return UnableToLegalize;
6142
6143 Register CondReg = MI.getOperand(i: 1).getReg();
6144 LLT CondTy = MRI.getType(Reg: CondReg);
6145 if (CondTy.isVector()) // TODO: Handle vselect
6146 return UnableToLegalize;
6147
6148 Register DstReg = MI.getOperand(i: 0).getReg();
6149 LLT DstTy = MRI.getType(Reg: DstReg);
6150
6151 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6152 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6153 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6154 LLT LeftoverTy;
6155 if (!extractParts(Reg: MI.getOperand(i: 2).getReg(), RegTy: DstTy, MainTy: NarrowTy, LeftoverTy,
6156 VRegs&: Src1Regs, LeftoverVRegs&: Src1LeftoverRegs, MIRBuilder, MRI))
6157 return UnableToLegalize;
6158
6159 LLT Unused;
6160 if (!extractParts(Reg: MI.getOperand(i: 3).getReg(), RegTy: DstTy, MainTy: NarrowTy, LeftoverTy&: Unused,
6161 VRegs&: Src2Regs, LeftoverVRegs&: Src2LeftoverRegs, MIRBuilder, MRI))
6162 llvm_unreachable("inconsistent extractParts result");
6163
6164 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6165 auto Select = MIRBuilder.buildSelect(Res: NarrowTy,
6166 Tst: CondReg, Op0: Src1Regs[I], Op1: Src2Regs[I]);
6167 DstRegs.push_back(Elt: Select.getReg(Idx: 0));
6168 }
6169
6170 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6171 auto Select = MIRBuilder.buildSelect(
6172 Res: LeftoverTy, Tst: CondReg, Op0: Src1LeftoverRegs[I], Op1: Src2LeftoverRegs[I]);
6173 DstLeftoverRegs.push_back(Elt: Select.getReg(Idx: 0));
6174 }
6175
6176 insertParts(DstReg, ResultTy: DstTy, PartTy: NarrowTy, PartRegs: DstRegs,
6177 LeftoverTy, LeftoverRegs: DstLeftoverRegs);
6178
6179 MI.eraseFromParent();
6180 return Legalized;
6181}
6182
6183LegalizerHelper::LegalizeResult
6184LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
6185 LLT NarrowTy) {
6186 if (TypeIdx != 1)
6187 return UnableToLegalize;
6188
6189 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6190 unsigned NarrowSize = NarrowTy.getSizeInBits();
6191
6192 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6193 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6194
6195 MachineIRBuilder &B = MIRBuilder;
6196 auto UnmergeSrc = B.buildUnmerge(Res: NarrowTy, Op: SrcReg);
6197 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6198 auto C_0 = B.buildConstant(Res: NarrowTy, Val: 0);
6199 auto HiIsZero = B.buildICmp(Pred: CmpInst::ICMP_EQ, Res: LLT::scalar(SizeInBits: 1),
6200 Op0: UnmergeSrc.getReg(Idx: 1), Op1: C_0);
6201 auto LoCTLZ = IsUndef ?
6202 B.buildCTLZ_ZERO_UNDEF(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 0)) :
6203 B.buildCTLZ(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 0));
6204 auto C_NarrowSize = B.buildConstant(Res: DstTy, Val: NarrowSize);
6205 auto HiIsZeroCTLZ = B.buildAdd(Dst: DstTy, Src0: LoCTLZ, Src1: C_NarrowSize);
6206 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 1));
6207 B.buildSelect(Res: DstReg, Tst: HiIsZero, Op0: HiIsZeroCTLZ, Op1: HiCTLZ);
6208
6209 MI.eraseFromParent();
6210 return Legalized;
6211 }
6212
6213 return UnableToLegalize;
6214}
6215
6216LegalizerHelper::LegalizeResult
6217LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
6218 LLT NarrowTy) {
6219 if (TypeIdx != 1)
6220 return UnableToLegalize;
6221
6222 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6223 unsigned NarrowSize = NarrowTy.getSizeInBits();
6224
6225 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6226 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6227
6228 MachineIRBuilder &B = MIRBuilder;
6229 auto UnmergeSrc = B.buildUnmerge(Res: NarrowTy, Op: SrcReg);
6230 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6231 auto C_0 = B.buildConstant(Res: NarrowTy, Val: 0);
6232 auto LoIsZero = B.buildICmp(Pred: CmpInst::ICMP_EQ, Res: LLT::scalar(SizeInBits: 1),
6233 Op0: UnmergeSrc.getReg(Idx: 0), Op1: C_0);
6234 auto HiCTTZ = IsUndef ?
6235 B.buildCTTZ_ZERO_UNDEF(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 1)) :
6236 B.buildCTTZ(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 1));
6237 auto C_NarrowSize = B.buildConstant(Res: DstTy, Val: NarrowSize);
6238 auto LoIsZeroCTTZ = B.buildAdd(Dst: DstTy, Src0: HiCTTZ, Src1: C_NarrowSize);
6239 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 0));
6240 B.buildSelect(Res: DstReg, Tst: LoIsZero, Op0: LoIsZeroCTTZ, Op1: LoCTTZ);
6241
6242 MI.eraseFromParent();
6243 return Legalized;
6244 }
6245
6246 return UnableToLegalize;
6247}
6248
6249LegalizerHelper::LegalizeResult
6250LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
6251 LLT NarrowTy) {
6252 if (TypeIdx != 1)
6253 return UnableToLegalize;
6254
6255 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6256 unsigned NarrowSize = NarrowTy.getSizeInBits();
6257
6258 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6259 auto UnmergeSrc = MIRBuilder.buildUnmerge(Res: NarrowTy, Op: MI.getOperand(i: 1));
6260
6261 auto LoCTPOP = MIRBuilder.buildCTPOP(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 0));
6262 auto HiCTPOP = MIRBuilder.buildCTPOP(Dst: DstTy, Src0: UnmergeSrc.getReg(Idx: 1));
6263 MIRBuilder.buildAdd(Dst: DstReg, Src0: HiCTPOP, Src1: LoCTPOP);
6264
6265 MI.eraseFromParent();
6266 return Legalized;
6267 }
6268
6269 return UnableToLegalize;
6270}
6271
6272LegalizerHelper::LegalizeResult
6273LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
6274 LLT NarrowTy) {
6275 if (TypeIdx != 1)
6276 return UnableToLegalize;
6277
6278 MachineIRBuilder &B = MIRBuilder;
6279 Register ExpReg = MI.getOperand(i: 2).getReg();
6280 LLT ExpTy = MRI.getType(Reg: ExpReg);
6281
6282 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6283
6284 // Clamp the exponent to the range of the target type.
6285 auto MinExp = B.buildConstant(Res: ExpTy, Val: minIntN(N: ClampSize));
6286 auto ClampMin = B.buildSMax(Dst: ExpTy, Src0: ExpReg, Src1: MinExp);
6287 auto MaxExp = B.buildConstant(Res: ExpTy, Val: maxIntN(N: ClampSize));
6288 auto Clamp = B.buildSMin(Dst: ExpTy, Src0: ClampMin, Src1: MaxExp);
6289
6290 auto Trunc = B.buildTrunc(Res: NarrowTy, Op: Clamp);
6291 Observer.changingInstr(MI);
6292 MI.getOperand(i: 2).setReg(Trunc.getReg(Idx: 0));
6293 Observer.changedInstr(MI);
6294 return Legalized;
6295}
6296
6297LegalizerHelper::LegalizeResult
6298LegalizerHelper::lowerBitCount(MachineInstr &MI) {
6299 unsigned Opc = MI.getOpcode();
6300 const auto &TII = MIRBuilder.getTII();
6301 auto isSupported = [this](const LegalityQuery &Q) {
6302 auto QAction = LI.getAction(Query: Q).Action;
6303 return QAction == Legal || QAction == Libcall || QAction == Custom;
6304 };
6305 switch (Opc) {
6306 default:
6307 return UnableToLegalize;
6308 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6309 // This trivially expands to CTLZ.
6310 Observer.changingInstr(MI);
6311 MI.setDesc(TII.get(Opcode: TargetOpcode::G_CTLZ));
6312 Observer.changedInstr(MI);
6313 return Legalized;
6314 }
6315 case TargetOpcode::G_CTLZ: {
6316 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6317 unsigned Len = SrcTy.getSizeInBits();
6318
6319 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6320 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
6321 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(Dst: DstTy, Src0: SrcReg);
6322 auto ZeroSrc = MIRBuilder.buildConstant(Res: SrcTy, Val: 0);
6323 auto ICmp = MIRBuilder.buildICmp(
6324 Pred: CmpInst::ICMP_EQ, Res: SrcTy.changeElementSize(NewEltSize: 1), Op0: SrcReg, Op1: ZeroSrc);
6325 auto LenConst = MIRBuilder.buildConstant(Res: DstTy, Val: Len);
6326 MIRBuilder.buildSelect(Res: DstReg, Tst: ICmp, Op0: LenConst, Op1: CtlzZU);
6327 MI.eraseFromParent();
6328 return Legalized;
6329 }
6330 // for now, we do this:
6331 // NewLen = NextPowerOf2(Len);
6332 // x = x | (x >> 1);
6333 // x = x | (x >> 2);
6334 // ...
6335 // x = x | (x >>16);
6336 // x = x | (x >>32); // for 64-bit input
6337 // Upto NewLen/2
6338 // return Len - popcount(x);
6339 //
6340 // Ref: "Hacker's Delight" by Henry Warren
6341 Register Op = SrcReg;
6342 unsigned NewLen = PowerOf2Ceil(A: Len);
6343 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6344 auto MIBShiftAmt = MIRBuilder.buildConstant(Res: SrcTy, Val: 1ULL << i);
6345 auto MIBOp = MIRBuilder.buildOr(
6346 Dst: SrcTy, Src0: Op, Src1: MIRBuilder.buildLShr(Dst: SrcTy, Src0: Op, Src1: MIBShiftAmt));
6347 Op = MIBOp.getReg(Idx: 0);
6348 }
6349 auto MIBPop = MIRBuilder.buildCTPOP(Dst: DstTy, Src0: Op);
6350 MIRBuilder.buildSub(Dst: MI.getOperand(i: 0), Src0: MIRBuilder.buildConstant(Res: DstTy, Val: Len),
6351 Src1: MIBPop);
6352 MI.eraseFromParent();
6353 return Legalized;
6354 }
6355 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6356 // This trivially expands to CTTZ.
6357 Observer.changingInstr(MI);
6358 MI.setDesc(TII.get(Opcode: TargetOpcode::G_CTTZ));
6359 Observer.changedInstr(MI);
6360 return Legalized;
6361 }
6362 case TargetOpcode::G_CTTZ: {
6363 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6364
6365 unsigned Len = SrcTy.getSizeInBits();
6366 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6367 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
6368 // zero.
6369 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(Dst: DstTy, Src0: SrcReg);
6370 auto Zero = MIRBuilder.buildConstant(Res: SrcTy, Val: 0);
6371 auto ICmp = MIRBuilder.buildICmp(
6372 Pred: CmpInst::ICMP_EQ, Res: DstTy.changeElementSize(NewEltSize: 1), Op0: SrcReg, Op1: Zero);
6373 auto LenConst = MIRBuilder.buildConstant(Res: DstTy, Val: Len);
6374 MIRBuilder.buildSelect(Res: DstReg, Tst: ICmp, Op0: LenConst, Op1: CttzZU);
6375 MI.eraseFromParent();
6376 return Legalized;
6377 }
6378 // for now, we use: { return popcount(~x & (x - 1)); }
6379 // unless the target has ctlz but not ctpop, in which case we use:
6380 // { return 32 - nlz(~x & (x-1)); }
6381 // Ref: "Hacker's Delight" by Henry Warren
6382 auto MIBCstNeg1 = MIRBuilder.buildConstant(Res: SrcTy, Val: -1);
6383 auto MIBNot = MIRBuilder.buildXor(Dst: SrcTy, Src0: SrcReg, Src1: MIBCstNeg1);
6384 auto MIBTmp = MIRBuilder.buildAnd(
6385 Dst: SrcTy, Src0: MIBNot, Src1: MIRBuilder.buildAdd(Dst: SrcTy, Src0: SrcReg, Src1: MIBCstNeg1));
6386 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6387 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6388 auto MIBCstLen = MIRBuilder.buildConstant(Res: SrcTy, Val: Len);
6389 MIRBuilder.buildSub(Dst: MI.getOperand(i: 0), Src0: MIBCstLen,
6390 Src1: MIRBuilder.buildCTLZ(Dst: SrcTy, Src0: MIBTmp));
6391 MI.eraseFromParent();
6392 return Legalized;
6393 }
6394 Observer.changingInstr(MI);
6395 MI.setDesc(TII.get(Opcode: TargetOpcode::G_CTPOP));
6396 MI.getOperand(i: 1).setReg(MIBTmp.getReg(Idx: 0));
6397 Observer.changedInstr(MI);
6398 return Legalized;
6399 }
6400 case TargetOpcode::G_CTPOP: {
6401 Register SrcReg = MI.getOperand(i: 1).getReg();
6402 LLT Ty = MRI.getType(Reg: SrcReg);
6403 unsigned Size = Ty.getSizeInBits();
6404 MachineIRBuilder &B = MIRBuilder;
6405
6406 // Count set bits in blocks of 2 bits. Default approach would be
6407 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
6408 // We use following formula instead:
6409 // B2Count = val - { (val >> 1) & 0x55555555 }
6410 // since it gives same result in blocks of 2 with one instruction less.
6411 auto C_1 = B.buildConstant(Res: Ty, Val: 1);
6412 auto B2Set1LoTo1Hi = B.buildLShr(Dst: Ty, Src0: SrcReg, Src1: C_1);
6413 APInt B2Mask1HiTo0 = APInt::getSplat(NewLen: Size, V: APInt(8, 0x55));
6414 auto C_B2Mask1HiTo0 = B.buildConstant(Res: Ty, Val: B2Mask1HiTo0);
6415 auto B2Count1Hi = B.buildAnd(Dst: Ty, Src0: B2Set1LoTo1Hi, Src1: C_B2Mask1HiTo0);
6416 auto B2Count = B.buildSub(Dst: Ty, Src0: SrcReg, Src1: B2Count1Hi);
6417
6418 // In order to get count in blocks of 4 add values from adjacent block of 2.
6419 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
6420 auto C_2 = B.buildConstant(Res: Ty, Val: 2);
6421 auto B4Set2LoTo2Hi = B.buildLShr(Dst: Ty, Src0: B2Count, Src1: C_2);
6422 APInt B4Mask2HiTo0 = APInt::getSplat(NewLen: Size, V: APInt(8, 0x33));
6423 auto C_B4Mask2HiTo0 = B.buildConstant(Res: Ty, Val: B4Mask2HiTo0);
6424 auto B4HiB2Count = B.buildAnd(Dst: Ty, Src0: B4Set2LoTo2Hi, Src1: C_B4Mask2HiTo0);
6425 auto B4LoB2Count = B.buildAnd(Dst: Ty, Src0: B2Count, Src1: C_B4Mask2HiTo0);
6426 auto B4Count = B.buildAdd(Dst: Ty, Src0: B4HiB2Count, Src1: B4LoB2Count);
6427
6428 // For count in blocks of 8 bits we don't have to mask high 4 bits before
6429 // addition since count value sits in range {0,...,8} and 4 bits are enough
6430 // to hold such binary values. After addition high 4 bits still hold count
6431 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
6432 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
6433 auto C_4 = B.buildConstant(Res: Ty, Val: 4);
6434 auto B8HiB4Count = B.buildLShr(Dst: Ty, Src0: B4Count, Src1: C_4);
6435 auto B8CountDirty4Hi = B.buildAdd(Dst: Ty, Src0: B8HiB4Count, Src1: B4Count);
6436 APInt B8Mask4HiTo0 = APInt::getSplat(NewLen: Size, V: APInt(8, 0x0F));
6437 auto C_B8Mask4HiTo0 = B.buildConstant(Res: Ty, Val: B8Mask4HiTo0);
6438 auto B8Count = B.buildAnd(Dst: Ty, Src0: B8CountDirty4Hi, Src1: C_B8Mask4HiTo0);
6439
6440 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
6441 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
6442 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
6443 auto MulMask = B.buildConstant(Res: Ty, Val: APInt::getSplat(NewLen: Size, V: APInt(8, 0x01)));
6444
6445 // Shift count result from 8 high bits to low bits.
6446 auto C_SizeM8 = B.buildConstant(Res: Ty, Val: Size - 8);
6447
6448 auto IsMulSupported = [this](const LLT Ty) {
6449 auto Action = LI.getAction(Query: {TargetOpcode::G_MUL, {Ty}}).Action;
6450 return Action == Legal || Action == WidenScalar || Action == Custom;
6451 };
6452 if (IsMulSupported(Ty)) {
6453 auto ResTmp = B.buildMul(Dst: Ty, Src0: B8Count, Src1: MulMask);
6454 B.buildLShr(Dst: MI.getOperand(i: 0).getReg(), Src0: ResTmp, Src1: C_SizeM8);
6455 } else {
6456 auto ResTmp = B8Count;
6457 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
6458 auto ShiftC = B.buildConstant(Res: Ty, Val: Shift);
6459 auto Shl = B.buildShl(Dst: Ty, Src0: ResTmp, Src1: ShiftC);
6460 ResTmp = B.buildAdd(Dst: Ty, Src0: ResTmp, Src1: Shl);
6461 }
6462 B.buildLShr(Dst: MI.getOperand(i: 0).getReg(), Src0: ResTmp, Src1: C_SizeM8);
6463 }
6464 MI.eraseFromParent();
6465 return Legalized;
6466 }
6467 }
6468}
6469
6470// Check that (every element of) Reg is undef or not an exact multiple of BW.
6471static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
6472 Register Reg, unsigned BW) {
6473 return matchUnaryPredicate(
6474 MRI, Reg,
6475 Match: [=](const Constant *C) {
6476 // Null constant here means an undef.
6477 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Val: C);
6478 return !CI || CI->getValue().urem(RHS: BW) != 0;
6479 },
6480 /*AllowUndefs*/ true);
6481}
6482
6483LegalizerHelper::LegalizeResult
6484LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
6485 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6486 LLT Ty = MRI.getType(Reg: Dst);
6487 LLT ShTy = MRI.getType(Reg: Z);
6488
6489 unsigned BW = Ty.getScalarSizeInBits();
6490
6491 if (!isPowerOf2_32(Value: BW))
6492 return UnableToLegalize;
6493
6494 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6495 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6496
6497 if (isNonZeroModBitWidthOrUndef(MRI, Reg: Z, BW)) {
6498 // fshl X, Y, Z -> fshr X, Y, -Z
6499 // fshr X, Y, Z -> fshl X, Y, -Z
6500 auto Zero = MIRBuilder.buildConstant(Res: ShTy, Val: 0);
6501 Z = MIRBuilder.buildSub(Dst: Ty, Src0: Zero, Src1: Z).getReg(Idx: 0);
6502 } else {
6503 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6504 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6505 auto One = MIRBuilder.buildConstant(Res: ShTy, Val: 1);
6506 if (IsFSHL) {
6507 Y = MIRBuilder.buildInstr(Opc: RevOpcode, DstOps: {Ty}, SrcOps: {X, Y, One}).getReg(Idx: 0);
6508 X = MIRBuilder.buildLShr(Dst: Ty, Src0: X, Src1: One).getReg(Idx: 0);
6509 } else {
6510 X = MIRBuilder.buildInstr(Opc: RevOpcode, DstOps: {Ty}, SrcOps: {X, Y, One}).getReg(Idx: 0);
6511 Y = MIRBuilder.buildShl(Dst: Ty, Src0: Y, Src1: One).getReg(Idx: 0);
6512 }
6513
6514 Z = MIRBuilder.buildNot(Dst: ShTy, Src0: Z).getReg(Idx: 0);
6515 }
6516
6517 MIRBuilder.buildInstr(Opc: RevOpcode, DstOps: {Dst}, SrcOps: {X, Y, Z});
6518 MI.eraseFromParent();
6519 return Legalized;
6520}
6521
6522LegalizerHelper::LegalizeResult
6523LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
6524 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6525 LLT Ty = MRI.getType(Reg: Dst);
6526 LLT ShTy = MRI.getType(Reg: Z);
6527
6528 const unsigned BW = Ty.getScalarSizeInBits();
6529 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6530
6531 Register ShX, ShY;
6532 Register ShAmt, InvShAmt;
6533
6534 // FIXME: Emit optimized urem by constant instead of letting it expand later.
6535 if (isNonZeroModBitWidthOrUndef(MRI, Reg: Z, BW)) {
6536 // fshl: X << C | Y >> (BW - C)
6537 // fshr: X << (BW - C) | Y >> C
6538 // where C = Z % BW is not zero
6539 auto BitWidthC = MIRBuilder.buildConstant(Res: ShTy, Val: BW);
6540 ShAmt = MIRBuilder.buildURem(Dst: ShTy, Src0: Z, Src1: BitWidthC).getReg(Idx: 0);
6541 InvShAmt = MIRBuilder.buildSub(Dst: ShTy, Src0: BitWidthC, Src1: ShAmt).getReg(Idx: 0);
6542 ShX = MIRBuilder.buildShl(Dst: Ty, Src0: X, Src1: IsFSHL ? ShAmt : InvShAmt).getReg(Idx: 0);
6543 ShY = MIRBuilder.buildLShr(Dst: Ty, Src0: Y, Src1: IsFSHL ? InvShAmt : ShAmt).getReg(Idx: 0);
6544 } else {
6545 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6546 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6547 auto Mask = MIRBuilder.buildConstant(Res: ShTy, Val: BW - 1);
6548 if (isPowerOf2_32(Value: BW)) {
6549 // Z % BW -> Z & (BW - 1)
6550 ShAmt = MIRBuilder.buildAnd(Dst: ShTy, Src0: Z, Src1: Mask).getReg(Idx: 0);
6551 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6552 auto NotZ = MIRBuilder.buildNot(Dst: ShTy, Src0: Z);
6553 InvShAmt = MIRBuilder.buildAnd(Dst: ShTy, Src0: NotZ, Src1: Mask).getReg(Idx: 0);
6554 } else {
6555 auto BitWidthC = MIRBuilder.buildConstant(Res: ShTy, Val: BW);
6556 ShAmt = MIRBuilder.buildURem(Dst: ShTy, Src0: Z, Src1: BitWidthC).getReg(Idx: 0);
6557 InvShAmt = MIRBuilder.buildSub(Dst: ShTy, Src0: Mask, Src1: ShAmt).getReg(Idx: 0);
6558 }
6559
6560 auto One = MIRBuilder.buildConstant(Res: ShTy, Val: 1);
6561 if (IsFSHL) {
6562 ShX = MIRBuilder.buildShl(Dst: Ty, Src0: X, Src1: ShAmt).getReg(Idx: 0);
6563 auto ShY1 = MIRBuilder.buildLShr(Dst: Ty, Src0: Y, Src1: One);
6564 ShY = MIRBuilder.buildLShr(Dst: Ty, Src0: ShY1, Src1: InvShAmt).getReg(Idx: 0);
6565 } else {
6566 auto ShX1 = MIRBuilder.buildShl(Dst: Ty, Src0: X, Src1: One);
6567 ShX = MIRBuilder.buildShl(Dst: Ty, Src0: ShX1, Src1: InvShAmt).getReg(Idx: 0);
6568 ShY = MIRBuilder.buildLShr(Dst: Ty, Src0: Y, Src1: ShAmt).getReg(Idx: 0);
6569 }
6570 }
6571
6572 MIRBuilder.buildOr(Dst, Src0: ShX, Src1: ShY);
6573 MI.eraseFromParent();
6574 return Legalized;
6575}
6576
6577LegalizerHelper::LegalizeResult
6578LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
6579 // These operations approximately do the following (while avoiding undefined
6580 // shifts by BW):
6581 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6582 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6583 Register Dst = MI.getOperand(i: 0).getReg();
6584 LLT Ty = MRI.getType(Reg: Dst);
6585 LLT ShTy = MRI.getType(Reg: MI.getOperand(i: 3).getReg());
6586
6587 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6588 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6589
6590 // TODO: Use smarter heuristic that accounts for vector legalization.
6591 if (LI.getAction(Query: {RevOpcode, {Ty, ShTy}}).Action == Lower)
6592 return lowerFunnelShiftAsShifts(MI);
6593
6594 // This only works for powers of 2, fallback to shifts if it fails.
6595 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6596 if (Result == UnableToLegalize)
6597 return lowerFunnelShiftAsShifts(MI);
6598 return Result;
6599}
6600
6601LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) {
6602 auto [Dst, Src] = MI.getFirst2Regs();
6603 LLT DstTy = MRI.getType(Reg: Dst);
6604 LLT SrcTy = MRI.getType(Reg: Src);
6605
6606 uint32_t DstTySize = DstTy.getSizeInBits();
6607 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
6608 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
6609
6610 if (!isPowerOf2_32(Value: DstTySize) || !isPowerOf2_32(Value: DstTyScalarSize) ||
6611 !isPowerOf2_32(Value: SrcTyScalarSize))
6612 return UnableToLegalize;
6613
6614 // The step between extend is too large, split it by creating an intermediate
6615 // extend instruction
6616 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6617 LLT MidTy = SrcTy.changeElementSize(NewEltSize: SrcTyScalarSize * 2);
6618 // If the destination type is illegal, split it into multiple statements
6619 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
6620 auto NewExt = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {MidTy}, SrcOps: {Src});
6621 // Unmerge the vector
6622 LLT EltTy = MidTy.changeElementCount(
6623 EC: MidTy.getElementCount().divideCoefficientBy(RHS: 2));
6624 auto UnmergeSrc = MIRBuilder.buildUnmerge(Res: EltTy, Op: NewExt);
6625
6626 // ZExt the vectors
6627 LLT ZExtResTy = DstTy.changeElementCount(
6628 EC: DstTy.getElementCount().divideCoefficientBy(RHS: 2));
6629 auto ZExtRes1 = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {ZExtResTy},
6630 SrcOps: {UnmergeSrc.getReg(Idx: 0)});
6631 auto ZExtRes2 = MIRBuilder.buildInstr(Opc: MI.getOpcode(), DstOps: {ZExtResTy},
6632 SrcOps: {UnmergeSrc.getReg(Idx: 1)});
6633
6634 // Merge the ending vectors
6635 MIRBuilder.buildMergeLikeInstr(Res: Dst, Ops: {ZExtRes1, ZExtRes2});
6636
6637 MI.eraseFromParent();
6638 return Legalized;
6639 }
6640 return UnableToLegalize;
6641}
6642
6643LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) {
6644 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6645 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
6646 // Similar to how operand splitting is done in SelectiondDAG, we can handle
6647 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6648 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6649 // %lo16(<4 x s16>) = G_TRUNC %inlo
6650 // %hi16(<4 x s16>) = G_TRUNC %inhi
6651 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6652 // %res(<8 x s8>) = G_TRUNC %in16
6653
6654 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6655
6656 Register DstReg = MI.getOperand(i: 0).getReg();
6657 Register SrcReg = MI.getOperand(i: 1).getReg();
6658 LLT DstTy = MRI.getType(Reg: DstReg);
6659 LLT SrcTy = MRI.getType(Reg: SrcReg);
6660
6661 if (DstTy.isVector() && isPowerOf2_32(Value: DstTy.getNumElements()) &&
6662 isPowerOf2_32(Value: DstTy.getScalarSizeInBits()) &&
6663 isPowerOf2_32(Value: SrcTy.getNumElements()) &&
6664 isPowerOf2_32(Value: SrcTy.getScalarSizeInBits())) {
6665 // Split input type.
6666 LLT SplitSrcTy = SrcTy.changeElementCount(
6667 EC: SrcTy.getElementCount().divideCoefficientBy(RHS: 2));
6668
6669 // First, split the source into two smaller vectors.
6670 SmallVector<Register, 2> SplitSrcs;
6671 extractParts(Reg: SrcReg, Ty: SplitSrcTy, NumParts: 2, VRegs&: SplitSrcs, MIRBuilder, MRI);
6672
6673 // Truncate the splits into intermediate narrower elements.
6674 LLT InterTy;
6675 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6676 InterTy = SplitSrcTy.changeElementSize(NewEltSize: DstTy.getScalarSizeInBits() * 2);
6677 else
6678 InterTy = SplitSrcTy.changeElementSize(NewEltSize: DstTy.getScalarSizeInBits());
6679 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6680 SplitSrcs[I] = MIRBuilder.buildTrunc(Res: InterTy, Op: SplitSrcs[I]).getReg(Idx: 0);
6681 }
6682
6683 // Combine the new truncates into one vector
6684 auto Merge = MIRBuilder.buildMergeLikeInstr(
6685 Res: DstTy.changeElementSize(NewEltSize: InterTy.getScalarSizeInBits()), Ops: SplitSrcs);
6686
6687 // Truncate the new vector to the final result type
6688 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6689 MIRBuilder.buildTrunc(Res: MI.getOperand(i: 0).getReg(), Op: Merge.getReg(Idx: 0));
6690 else
6691 MIRBuilder.buildCopy(Res: MI.getOperand(i: 0).getReg(), Op: Merge.getReg(Idx: 0));
6692
6693 MI.eraseFromParent();
6694
6695 return Legalized;
6696 }
6697 return UnableToLegalize;
6698}
6699
6700LegalizerHelper::LegalizeResult
6701LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
6702 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6703 auto Zero = MIRBuilder.buildConstant(Res: AmtTy, Val: 0);
6704 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6705 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6706 auto Neg = MIRBuilder.buildSub(Dst: AmtTy, Src0: Zero, Src1: Amt);
6707 MIRBuilder.buildInstr(Opc: RevRot, DstOps: {Dst}, SrcOps: {Src, Neg});
6708 MI.eraseFromParent();
6709 return Legalized;
6710}
6711
6712LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
6713 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6714
6715 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6716 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6717
6718 MIRBuilder.setInstrAndDebugLoc(MI);
6719
6720 // If a rotate in the other direction is supported, use it.
6721 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6722 if (LI.isLegalOrCustom(Query: {RevRot, {DstTy, SrcTy}}) &&
6723 isPowerOf2_32(Value: EltSizeInBits))
6724 return lowerRotateWithReverseRotate(MI);
6725
6726 // If a funnel shift is supported, use it.
6727 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6728 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6729 bool IsFShLegal = false;
6730 if ((IsFShLegal = LI.isLegalOrCustom(Query: {FShOpc, {DstTy, AmtTy}})) ||
6731 LI.isLegalOrCustom(Query: {RevFsh, {DstTy, AmtTy}})) {
6732 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6733 Register R3) {
6734 MIRBuilder.buildInstr(Opc, DstOps: {R1}, SrcOps: {R2, R2, R3});
6735 MI.eraseFromParent();
6736 return Legalized;
6737 };
6738 // If a funnel shift in the other direction is supported, use it.
6739 if (IsFShLegal) {
6740 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6741 } else if (isPowerOf2_32(Value: EltSizeInBits)) {
6742 Amt = MIRBuilder.buildNeg(Dst: DstTy, Src0: Amt).getReg(Idx: 0);
6743 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6744 }
6745 }
6746
6747 auto Zero = MIRBuilder.buildConstant(Res: AmtTy, Val: 0);
6748 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6749 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6750 auto BitWidthMinusOneC = MIRBuilder.buildConstant(Res: AmtTy, Val: EltSizeInBits - 1);
6751 Register ShVal;
6752 Register RevShiftVal;
6753 if (isPowerOf2_32(Value: EltSizeInBits)) {
6754 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6755 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6756 auto NegAmt = MIRBuilder.buildSub(Dst: AmtTy, Src0: Zero, Src1: Amt);
6757 auto ShAmt = MIRBuilder.buildAnd(Dst: AmtTy, Src0: Amt, Src1: BitWidthMinusOneC);
6758 ShVal = MIRBuilder.buildInstr(Opc: ShOpc, DstOps: {DstTy}, SrcOps: {Src, ShAmt}).getReg(Idx: 0);
6759 auto RevAmt = MIRBuilder.buildAnd(Dst: AmtTy, Src0: NegAmt, Src1: BitWidthMinusOneC);
6760 RevShiftVal =
6761 MIRBuilder.buildInstr(Opc: RevShiftOpc, DstOps: {DstTy}, SrcOps: {Src, RevAmt}).getReg(Idx: 0);
6762 } else {
6763 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6764 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6765 auto BitWidthC = MIRBuilder.buildConstant(Res: AmtTy, Val: EltSizeInBits);
6766 auto ShAmt = MIRBuilder.buildURem(Dst: AmtTy, Src0: Amt, Src1: BitWidthC);
6767 ShVal = MIRBuilder.buildInstr(Opc: ShOpc, DstOps: {DstTy}, SrcOps: {Src, ShAmt}).getReg(Idx: 0);
6768 auto RevAmt = MIRBuilder.buildSub(Dst: AmtTy, Src0: BitWidthMinusOneC, Src1: ShAmt);
6769 auto One = MIRBuilder.buildConstant(Res: AmtTy, Val: 1);
6770 auto Inner = MIRBuilder.buildInstr(Opc: RevShiftOpc, DstOps: {DstTy}, SrcOps: {Src, One});
6771 RevShiftVal =
6772 MIRBuilder.buildInstr(Opc: RevShiftOpc, DstOps: {DstTy}, SrcOps: {Inner, RevAmt}).getReg(Idx: 0);
6773 }
6774 MIRBuilder.buildOr(Dst, Src0: ShVal, Src1: RevShiftVal);
6775 MI.eraseFromParent();
6776 return Legalized;
6777}
6778
6779// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
6780// representation.
6781LegalizerHelper::LegalizeResult
6782LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
6783 auto [Dst, Src] = MI.getFirst2Regs();
6784 const LLT S64 = LLT::scalar(SizeInBits: 64);
6785 const LLT S32 = LLT::scalar(SizeInBits: 32);
6786 const LLT S1 = LLT::scalar(SizeInBits: 1);
6787
6788 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
6789
6790 // unsigned cul2f(ulong u) {
6791 // uint lz = clz(u);
6792 // uint e = (u != 0) ? 127U + 63U - lz : 0;
6793 // u = (u << lz) & 0x7fffffffffffffffUL;
6794 // ulong t = u & 0xffffffffffUL;
6795 // uint v = (e << 23) | (uint)(u >> 40);
6796 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
6797 // return as_float(v + r);
6798 // }
6799
6800 auto Zero32 = MIRBuilder.buildConstant(Res: S32, Val: 0);
6801 auto Zero64 = MIRBuilder.buildConstant(Res: S64, Val: 0);
6802
6803 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(Dst: S32, Src0: Src);
6804
6805 auto K = MIRBuilder.buildConstant(Res: S32, Val: 127U + 63U);
6806 auto Sub = MIRBuilder.buildSub(Dst: S32, Src0: K, Src1: LZ);
6807
6808 auto NotZero = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: S1, Op0: Src, Op1: Zero64);
6809 auto E = MIRBuilder.buildSelect(Res: S32, Tst: NotZero, Op0: Sub, Op1: Zero32);
6810
6811 auto Mask0 = MIRBuilder.buildConstant(Res: S64, Val: (-1ULL) >> 1);
6812 auto ShlLZ = MIRBuilder.buildShl(Dst: S64, Src0: Src, Src1: LZ);
6813
6814 auto U = MIRBuilder.buildAnd(Dst: S64, Src0: ShlLZ, Src1: Mask0);
6815
6816 auto Mask1 = MIRBuilder.buildConstant(Res: S64, Val: 0xffffffffffULL);
6817 auto T = MIRBuilder.buildAnd(Dst: S64, Src0: U, Src1: Mask1);
6818
6819 auto UShl = MIRBuilder.buildLShr(Dst: S64, Src0: U, Src1: MIRBuilder.buildConstant(Res: S64, Val: 40));
6820 auto ShlE = MIRBuilder.buildShl(Dst: S32, Src0: E, Src1: MIRBuilder.buildConstant(Res: S32, Val: 23));
6821 auto V = MIRBuilder.buildOr(Dst: S32, Src0: ShlE, Src1: MIRBuilder.buildTrunc(Res: S32, Op: UShl));
6822
6823 auto C = MIRBuilder.buildConstant(Res: S64, Val: 0x8000000000ULL);
6824 auto RCmp = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_UGT, Res: S1, Op0: T, Op1: C);
6825 auto TCmp = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: S1, Op0: T, Op1: C);
6826 auto One = MIRBuilder.buildConstant(Res: S32, Val: 1);
6827
6828 auto VTrunc1 = MIRBuilder.buildAnd(Dst: S32, Src0: V, Src1: One);
6829 auto Select0 = MIRBuilder.buildSelect(Res: S32, Tst: TCmp, Op0: VTrunc1, Op1: Zero32);
6830 auto R = MIRBuilder.buildSelect(Res: S32, Tst: RCmp, Op0: One, Op1: Select0);
6831 MIRBuilder.buildAdd(Dst, Src0: V, Src1: R);
6832
6833 MI.eraseFromParent();
6834 return Legalized;
6835}
6836
6837LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
6838 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6839
6840 if (SrcTy == LLT::scalar(SizeInBits: 1)) {
6841 auto True = MIRBuilder.buildFConstant(Res: DstTy, Val: 1.0);
6842 auto False = MIRBuilder.buildFConstant(Res: DstTy, Val: 0.0);
6843 MIRBuilder.buildSelect(Res: Dst, Tst: Src, Op0: True, Op1: False);
6844 MI.eraseFromParent();
6845 return Legalized;
6846 }
6847
6848 if (SrcTy != LLT::scalar(SizeInBits: 64))
6849 return UnableToLegalize;
6850
6851 if (DstTy == LLT::scalar(SizeInBits: 32)) {
6852 // TODO: SelectionDAG has several alternative expansions to port which may
6853 // be more reasonble depending on the available instructions. If a target
6854 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
6855 // intermediate type, this is probably worse.
6856 return lowerU64ToF32BitOps(MI);
6857 }
6858
6859 return UnableToLegalize;
6860}
6861
6862LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
6863 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6864
6865 const LLT S64 = LLT::scalar(SizeInBits: 64);
6866 const LLT S32 = LLT::scalar(SizeInBits: 32);
6867 const LLT S1 = LLT::scalar(SizeInBits: 1);
6868
6869 if (SrcTy == S1) {
6870 auto True = MIRBuilder.buildFConstant(Res: DstTy, Val: -1.0);
6871 auto False = MIRBuilder.buildFConstant(Res: DstTy, Val: 0.0);
6872 MIRBuilder.buildSelect(Res: Dst, Tst: Src, Op0: True, Op1: False);
6873 MI.eraseFromParent();
6874 return Legalized;
6875 }
6876
6877 if (SrcTy != S64)
6878 return UnableToLegalize;
6879
6880 if (DstTy == S32) {
6881 // signed cl2f(long l) {
6882 // long s = l >> 63;
6883 // float r = cul2f((l + s) ^ s);
6884 // return s ? -r : r;
6885 // }
6886 Register L = Src;
6887 auto SignBit = MIRBuilder.buildConstant(Res: S64, Val: 63);
6888 auto S = MIRBuilder.buildAShr(Dst: S64, Src0: L, Src1: SignBit);
6889
6890 auto LPlusS = MIRBuilder.buildAdd(Dst: S64, Src0: L, Src1: S);
6891 auto Xor = MIRBuilder.buildXor(Dst: S64, Src0: LPlusS, Src1: S);
6892 auto R = MIRBuilder.buildUITOFP(Dst: S32, Src0: Xor);
6893
6894 auto RNeg = MIRBuilder.buildFNeg(Dst: S32, Src0: R);
6895 auto SignNotZero = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: S1, Op0: S,
6896 Op1: MIRBuilder.buildConstant(Res: S64, Val: 0));
6897 MIRBuilder.buildSelect(Res: Dst, Tst: SignNotZero, Op0: RNeg, Op1: R);
6898 MI.eraseFromParent();
6899 return Legalized;
6900 }
6901
6902 return UnableToLegalize;
6903}
6904
6905LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
6906 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6907 const LLT S64 = LLT::scalar(SizeInBits: 64);
6908 const LLT S32 = LLT::scalar(SizeInBits: 32);
6909
6910 if (SrcTy != S64 && SrcTy != S32)
6911 return UnableToLegalize;
6912 if (DstTy != S32 && DstTy != S64)
6913 return UnableToLegalize;
6914
6915 // FPTOSI gives same result as FPTOUI for positive signed integers.
6916 // FPTOUI needs to deal with fp values that convert to unsigned integers
6917 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
6918
6919 APInt TwoPExpInt = APInt::getSignMask(BitWidth: DstTy.getSizeInBits());
6920 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
6921 : APFloat::IEEEdouble(),
6922 APInt::getZero(numBits: SrcTy.getSizeInBits()));
6923 TwoPExpFP.convertFromAPInt(Input: TwoPExpInt, IsSigned: false, RM: APFloat::rmNearestTiesToEven);
6924
6925 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(Dst: DstTy, Src0: Src);
6926
6927 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(Res: SrcTy, Val: TwoPExpFP);
6928 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
6929 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
6930 MachineInstrBuilder FSub = MIRBuilder.buildFSub(Dst: SrcTy, Src0: Src, Src1: Threshold);
6931 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(Dst: DstTy, Src0: FSub);
6932 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(Res: DstTy, Val: TwoPExpInt);
6933 MachineInstrBuilder Res = MIRBuilder.buildXor(Dst: DstTy, Src0: ResLowBits, Src1: ResHighBit);
6934
6935 const LLT S1 = LLT::scalar(SizeInBits: 1);
6936
6937 MachineInstrBuilder FCMP =
6938 MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_ULT, Res: S1, Op0: Src, Op1: Threshold);
6939 MIRBuilder.buildSelect(Res: Dst, Tst: FCMP, Op0: FPTOSI, Op1: Res);
6940
6941 MI.eraseFromParent();
6942 return Legalized;
6943}
6944
6945LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
6946 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6947 const LLT S64 = LLT::scalar(SizeInBits: 64);
6948 const LLT S32 = LLT::scalar(SizeInBits: 32);
6949
6950 // FIXME: Only f32 to i64 conversions are supported.
6951 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
6952 return UnableToLegalize;
6953
6954 // Expand f32 -> i64 conversion
6955 // This algorithm comes from compiler-rt's implementation of fixsfdi:
6956 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
6957
6958 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
6959
6960 auto ExponentMask = MIRBuilder.buildConstant(Res: SrcTy, Val: 0x7F800000);
6961 auto ExponentLoBit = MIRBuilder.buildConstant(Res: SrcTy, Val: 23);
6962
6963 auto AndExpMask = MIRBuilder.buildAnd(Dst: SrcTy, Src0: Src, Src1: ExponentMask);
6964 auto ExponentBits = MIRBuilder.buildLShr(Dst: SrcTy, Src0: AndExpMask, Src1: ExponentLoBit);
6965
6966 auto SignMask = MIRBuilder.buildConstant(Res: SrcTy,
6967 Val: APInt::getSignMask(BitWidth: SrcEltBits));
6968 auto AndSignMask = MIRBuilder.buildAnd(Dst: SrcTy, Src0: Src, Src1: SignMask);
6969 auto SignLowBit = MIRBuilder.buildConstant(Res: SrcTy, Val: SrcEltBits - 1);
6970 auto Sign = MIRBuilder.buildAShr(Dst: SrcTy, Src0: AndSignMask, Src1: SignLowBit);
6971 Sign = MIRBuilder.buildSExt(Res: DstTy, Op: Sign);
6972
6973 auto MantissaMask = MIRBuilder.buildConstant(Res: SrcTy, Val: 0x007FFFFF);
6974 auto AndMantissaMask = MIRBuilder.buildAnd(Dst: SrcTy, Src0: Src, Src1: MantissaMask);
6975 auto K = MIRBuilder.buildConstant(Res: SrcTy, Val: 0x00800000);
6976
6977 auto R = MIRBuilder.buildOr(Dst: SrcTy, Src0: AndMantissaMask, Src1: K);
6978 R = MIRBuilder.buildZExt(Res: DstTy, Op: R);
6979
6980 auto Bias = MIRBuilder.buildConstant(Res: SrcTy, Val: 127);
6981 auto Exponent = MIRBuilder.buildSub(Dst: SrcTy, Src0: ExponentBits, Src1: Bias);
6982 auto SubExponent = MIRBuilder.buildSub(Dst: SrcTy, Src0: Exponent, Src1: ExponentLoBit);
6983 auto ExponentSub = MIRBuilder.buildSub(Dst: SrcTy, Src0: ExponentLoBit, Src1: Exponent);
6984
6985 auto Shl = MIRBuilder.buildShl(Dst: DstTy, Src0: R, Src1: SubExponent);
6986 auto Srl = MIRBuilder.buildLShr(Dst: DstTy, Src0: R, Src1: ExponentSub);
6987
6988 const LLT S1 = LLT::scalar(SizeInBits: 1);
6989 auto CmpGt = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SGT,
6990 Res: S1, Op0: Exponent, Op1: ExponentLoBit);
6991
6992 R = MIRBuilder.buildSelect(Res: DstTy, Tst: CmpGt, Op0: Shl, Op1: Srl);
6993
6994 auto XorSign = MIRBuilder.buildXor(Dst: DstTy, Src0: R, Src1: Sign);
6995 auto Ret = MIRBuilder.buildSub(Dst: DstTy, Src0: XorSign, Src1: Sign);
6996
6997 auto ZeroSrcTy = MIRBuilder.buildConstant(Res: SrcTy, Val: 0);
6998
6999 auto ExponentLt0 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT,
7000 Res: S1, Op0: Exponent, Op1: ZeroSrcTy);
7001
7002 auto ZeroDstTy = MIRBuilder.buildConstant(Res: DstTy, Val: 0);
7003 MIRBuilder.buildSelect(Res: Dst, Tst: ExponentLt0, Op0: ZeroDstTy, Op1: Ret);
7004
7005 MI.eraseFromParent();
7006 return Legalized;
7007}
7008
7009// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7010LegalizerHelper::LegalizeResult
7011LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
7012 const LLT S1 = LLT::scalar(SizeInBits: 1);
7013 const LLT S32 = LLT::scalar(SizeInBits: 32);
7014
7015 auto [Dst, Src] = MI.getFirst2Regs();
7016 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
7017 MRI.getType(Src).getScalarType() == LLT::scalar(64));
7018
7019 if (MRI.getType(Reg: Src).isVector()) // TODO: Handle vectors directly.
7020 return UnableToLegalize;
7021
7022 if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
7023 unsigned Flags = MI.getFlags();
7024 auto Src32 = MIRBuilder.buildFPTrunc(Res: S32, Op: Src, Flags);
7025 MIRBuilder.buildFPTrunc(Res: Dst, Op: Src32, Flags);
7026 MI.eraseFromParent();
7027 return Legalized;
7028 }
7029
7030 const unsigned ExpMask = 0x7ff;
7031 const unsigned ExpBiasf64 = 1023;
7032 const unsigned ExpBiasf16 = 15;
7033
7034 auto Unmerge = MIRBuilder.buildUnmerge(Res: S32, Op: Src);
7035 Register U = Unmerge.getReg(Idx: 0);
7036 Register UH = Unmerge.getReg(Idx: 1);
7037
7038 auto E = MIRBuilder.buildLShr(Dst: S32, Src0: UH, Src1: MIRBuilder.buildConstant(Res: S32, Val: 20));
7039 E = MIRBuilder.buildAnd(Dst: S32, Src0: E, Src1: MIRBuilder.buildConstant(Res: S32, Val: ExpMask));
7040
7041 // Subtract the fp64 exponent bias (1023) to get the real exponent and
7042 // add the f16 bias (15) to get the biased exponent for the f16 format.
7043 E = MIRBuilder.buildAdd(
7044 Dst: S32, Src0: E, Src1: MIRBuilder.buildConstant(Res: S32, Val: -ExpBiasf64 + ExpBiasf16));
7045
7046 auto M = MIRBuilder.buildLShr(Dst: S32, Src0: UH, Src1: MIRBuilder.buildConstant(Res: S32, Val: 8));
7047 M = MIRBuilder.buildAnd(Dst: S32, Src0: M, Src1: MIRBuilder.buildConstant(Res: S32, Val: 0xffe));
7048
7049 auto MaskedSig = MIRBuilder.buildAnd(Dst: S32, Src0: UH,
7050 Src1: MIRBuilder.buildConstant(Res: S32, Val: 0x1ff));
7051 MaskedSig = MIRBuilder.buildOr(Dst: S32, Src0: MaskedSig, Src1: U);
7052
7053 auto Zero = MIRBuilder.buildConstant(Res: S32, Val: 0);
7054 auto SigCmpNE0 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: S1, Op0: MaskedSig, Op1: Zero);
7055 auto Lo40Set = MIRBuilder.buildZExt(Res: S32, Op: SigCmpNE0);
7056 M = MIRBuilder.buildOr(Dst: S32, Src0: M, Src1: Lo40Set);
7057
7058 // (M != 0 ? 0x0200 : 0) | 0x7c00;
7059 auto Bits0x200 = MIRBuilder.buildConstant(Res: S32, Val: 0x0200);
7060 auto CmpM_NE0 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: S1, Op0: M, Op1: Zero);
7061 auto SelectCC = MIRBuilder.buildSelect(Res: S32, Tst: CmpM_NE0, Op0: Bits0x200, Op1: Zero);
7062
7063 auto Bits0x7c00 = MIRBuilder.buildConstant(Res: S32, Val: 0x7c00);
7064 auto I = MIRBuilder.buildOr(Dst: S32, Src0: SelectCC, Src1: Bits0x7c00);
7065
7066 // N = M | (E << 12);
7067 auto EShl12 = MIRBuilder.buildShl(Dst: S32, Src0: E, Src1: MIRBuilder.buildConstant(Res: S32, Val: 12));
7068 auto N = MIRBuilder.buildOr(Dst: S32, Src0: M, Src1: EShl12);
7069
7070 // B = clamp(1-E, 0, 13);
7071 auto One = MIRBuilder.buildConstant(Res: S32, Val: 1);
7072 auto OneSubExp = MIRBuilder.buildSub(Dst: S32, Src0: One, Src1: E);
7073 auto B = MIRBuilder.buildSMax(Dst: S32, Src0: OneSubExp, Src1: Zero);
7074 B = MIRBuilder.buildSMin(Dst: S32, Src0: B, Src1: MIRBuilder.buildConstant(Res: S32, Val: 13));
7075
7076 auto SigSetHigh = MIRBuilder.buildOr(Dst: S32, Src0: M,
7077 Src1: MIRBuilder.buildConstant(Res: S32, Val: 0x1000));
7078
7079 auto D = MIRBuilder.buildLShr(Dst: S32, Src0: SigSetHigh, Src1: B);
7080 auto D0 = MIRBuilder.buildShl(Dst: S32, Src0: D, Src1: B);
7081
7082 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: S1,
7083 Op0: D0, Op1: SigSetHigh);
7084 auto D1 = MIRBuilder.buildZExt(Res: S32, Op: D0_NE_SigSetHigh);
7085 D = MIRBuilder.buildOr(Dst: S32, Src0: D, Src1: D1);
7086
7087 auto CmpELtOne = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: S1, Op0: E, Op1: One);
7088 auto V = MIRBuilder.buildSelect(Res: S32, Tst: CmpELtOne, Op0: D, Op1: N);
7089
7090 auto VLow3 = MIRBuilder.buildAnd(Dst: S32, Src0: V, Src1: MIRBuilder.buildConstant(Res: S32, Val: 7));
7091 V = MIRBuilder.buildLShr(Dst: S32, Src0: V, Src1: MIRBuilder.buildConstant(Res: S32, Val: 2));
7092
7093 auto VLow3Eq3 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: S1, Op0: VLow3,
7094 Op1: MIRBuilder.buildConstant(Res: S32, Val: 3));
7095 auto V0 = MIRBuilder.buildZExt(Res: S32, Op: VLow3Eq3);
7096
7097 auto VLow3Gt5 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SGT, Res: S1, Op0: VLow3,
7098 Op1: MIRBuilder.buildConstant(Res: S32, Val: 5));
7099 auto V1 = MIRBuilder.buildZExt(Res: S32, Op: VLow3Gt5);
7100
7101 V1 = MIRBuilder.buildOr(Dst: S32, Src0: V0, Src1: V1);
7102 V = MIRBuilder.buildAdd(Dst: S32, Src0: V, Src1: V1);
7103
7104 auto CmpEGt30 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SGT, Res: S1,
7105 Op0: E, Op1: MIRBuilder.buildConstant(Res: S32, Val: 30));
7106 V = MIRBuilder.buildSelect(Res: S32, Tst: CmpEGt30,
7107 Op0: MIRBuilder.buildConstant(Res: S32, Val: 0x7c00), Op1: V);
7108
7109 auto CmpEGt1039 = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_EQ, Res: S1,
7110 Op0: E, Op1: MIRBuilder.buildConstant(Res: S32, Val: 1039));
7111 V = MIRBuilder.buildSelect(Res: S32, Tst: CmpEGt1039, Op0: I, Op1: V);
7112
7113 // Extract the sign bit.
7114 auto Sign = MIRBuilder.buildLShr(Dst: S32, Src0: UH, Src1: MIRBuilder.buildConstant(Res: S32, Val: 16));
7115 Sign = MIRBuilder.buildAnd(Dst: S32, Src0: Sign, Src1: MIRBuilder.buildConstant(Res: S32, Val: 0x8000));
7116
7117 // Insert the sign bit
7118 V = MIRBuilder.buildOr(Dst: S32, Src0: Sign, Src1: V);
7119
7120 MIRBuilder.buildTrunc(Res: Dst, Op: V);
7121 MI.eraseFromParent();
7122 return Legalized;
7123}
7124
7125LegalizerHelper::LegalizeResult
7126LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
7127 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7128 const LLT S64 = LLT::scalar(SizeInBits: 64);
7129 const LLT S16 = LLT::scalar(SizeInBits: 16);
7130
7131 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7132 return lowerFPTRUNC_F64_TO_F16(MI);
7133
7134 return UnableToLegalize;
7135}
7136
7137// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
7138// multiplication tree.
7139LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
7140 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7141 LLT Ty = MRI.getType(Reg: Dst);
7142
7143 auto CvtSrc1 = MIRBuilder.buildSITOFP(Dst: Ty, Src0: Src1);
7144 MIRBuilder.buildFPow(Dst, Src0, Src1: CvtSrc1, Flags: MI.getFlags());
7145 MI.eraseFromParent();
7146 return Legalized;
7147}
7148
7149static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
7150 switch (Opc) {
7151 case TargetOpcode::G_SMIN:
7152 return CmpInst::ICMP_SLT;
7153 case TargetOpcode::G_SMAX:
7154 return CmpInst::ICMP_SGT;
7155 case TargetOpcode::G_UMIN:
7156 return CmpInst::ICMP_ULT;
7157 case TargetOpcode::G_UMAX:
7158 return CmpInst::ICMP_UGT;
7159 default:
7160 llvm_unreachable("not in integer min/max");
7161 }
7162}
7163
7164LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
7165 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7166
7167 const CmpInst::Predicate Pred = minMaxToCompare(Opc: MI.getOpcode());
7168 LLT CmpType = MRI.getType(Reg: Dst).changeElementSize(NewEltSize: 1);
7169
7170 auto Cmp = MIRBuilder.buildICmp(Pred, Res: CmpType, Op0: Src0, Op1: Src1);
7171 MIRBuilder.buildSelect(Res: Dst, Tst: Cmp, Op0: Src0, Op1: Src1);
7172
7173 MI.eraseFromParent();
7174 return Legalized;
7175}
7176
7177LegalizerHelper::LegalizeResult
7178LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
7179 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
7180 const int Src0Size = Src0Ty.getScalarSizeInBits();
7181 const int Src1Size = Src1Ty.getScalarSizeInBits();
7182
7183 auto SignBitMask = MIRBuilder.buildConstant(
7184 Res: Src0Ty, Val: APInt::getSignMask(BitWidth: Src0Size));
7185
7186 auto NotSignBitMask = MIRBuilder.buildConstant(
7187 Res: Src0Ty, Val: APInt::getLowBitsSet(numBits: Src0Size, loBitsSet: Src0Size - 1));
7188
7189 Register And0 = MIRBuilder.buildAnd(Dst: Src0Ty, Src0, Src1: NotSignBitMask).getReg(Idx: 0);
7190 Register And1;
7191 if (Src0Ty == Src1Ty) {
7192 And1 = MIRBuilder.buildAnd(Dst: Src1Ty, Src0: Src1, Src1: SignBitMask).getReg(Idx: 0);
7193 } else if (Src0Size > Src1Size) {
7194 auto ShiftAmt = MIRBuilder.buildConstant(Res: Src0Ty, Val: Src0Size - Src1Size);
7195 auto Zext = MIRBuilder.buildZExt(Res: Src0Ty, Op: Src1);
7196 auto Shift = MIRBuilder.buildShl(Dst: Src0Ty, Src0: Zext, Src1: ShiftAmt);
7197 And1 = MIRBuilder.buildAnd(Dst: Src0Ty, Src0: Shift, Src1: SignBitMask).getReg(Idx: 0);
7198 } else {
7199 auto ShiftAmt = MIRBuilder.buildConstant(Res: Src1Ty, Val: Src1Size - Src0Size);
7200 auto Shift = MIRBuilder.buildLShr(Dst: Src1Ty, Src0: Src1, Src1: ShiftAmt);
7201 auto Trunc = MIRBuilder.buildTrunc(Res: Src0Ty, Op: Shift);
7202 And1 = MIRBuilder.buildAnd(Dst: Src0Ty, Src0: Trunc, Src1: SignBitMask).getReg(Idx: 0);
7203 }
7204
7205 // Be careful about setting nsz/nnan/ninf on every instruction, since the
7206 // constants are a nan and -0.0, but the final result should preserve
7207 // everything.
7208 unsigned Flags = MI.getFlags();
7209 MIRBuilder.buildOr(Dst, Src0: And0, Src1: And1, Flags);
7210
7211 MI.eraseFromParent();
7212 return Legalized;
7213}
7214
7215LegalizerHelper::LegalizeResult
7216LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
7217 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7218 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7219
7220 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7221 LLT Ty = MRI.getType(Reg: Dst);
7222
7223 if (!MI.getFlag(Flag: MachineInstr::FmNoNans)) {
7224 // Insert canonicalizes if it's possible we need to quiet to get correct
7225 // sNaN behavior.
7226
7227 // Note this must be done here, and not as an optimization combine in the
7228 // absence of a dedicate quiet-snan instruction as we're using an
7229 // omni-purpose G_FCANONICALIZE.
7230 if (!isKnownNeverSNaN(Val: Src0, MRI))
7231 Src0 = MIRBuilder.buildFCanonicalize(Dst: Ty, Src0, Flags: MI.getFlags()).getReg(Idx: 0);
7232
7233 if (!isKnownNeverSNaN(Val: Src1, MRI))
7234 Src1 = MIRBuilder.buildFCanonicalize(Dst: Ty, Src0: Src1, Flags: MI.getFlags()).getReg(Idx: 0);
7235 }
7236
7237 // If there are no nans, it's safe to simply replace this with the non-IEEE
7238 // version.
7239 MIRBuilder.buildInstr(Opc: NewOp, DstOps: {Dst}, SrcOps: {Src0, Src1}, Flags: MI.getFlags());
7240 MI.eraseFromParent();
7241 return Legalized;
7242}
7243
7244LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
7245 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
7246 Register DstReg = MI.getOperand(i: 0).getReg();
7247 LLT Ty = MRI.getType(Reg: DstReg);
7248 unsigned Flags = MI.getFlags();
7249
7250 auto Mul = MIRBuilder.buildFMul(Dst: Ty, Src0: MI.getOperand(i: 1), Src1: MI.getOperand(i: 2),
7251 Flags);
7252 MIRBuilder.buildFAdd(Dst: DstReg, Src0: Mul, Src1: MI.getOperand(i: 3), Flags);
7253 MI.eraseFromParent();
7254 return Legalized;
7255}
7256
7257LegalizerHelper::LegalizeResult
7258LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
7259 auto [DstReg, X] = MI.getFirst2Regs();
7260 const unsigned Flags = MI.getFlags();
7261 const LLT Ty = MRI.getType(Reg: DstReg);
7262 const LLT CondTy = Ty.changeElementSize(NewEltSize: 1);
7263
7264 // round(x) =>
7265 // t = trunc(x);
7266 // d = fabs(x - t);
7267 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
7268 // return t + o;
7269
7270 auto T = MIRBuilder.buildIntrinsicTrunc(Dst: Ty, Src0: X, Flags);
7271
7272 auto Diff = MIRBuilder.buildFSub(Dst: Ty, Src0: X, Src1: T, Flags);
7273 auto AbsDiff = MIRBuilder.buildFAbs(Dst: Ty, Src0: Diff, Flags);
7274
7275 auto Half = MIRBuilder.buildFConstant(Res: Ty, Val: 0.5);
7276 auto Cmp =
7277 MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_OGE, Res: CondTy, Op0: AbsDiff, Op1: Half, Flags);
7278
7279 // Could emit G_UITOFP instead
7280 auto One = MIRBuilder.buildFConstant(Res: Ty, Val: 1.0);
7281 auto Zero = MIRBuilder.buildFConstant(Res: Ty, Val: 0.0);
7282 auto BoolFP = MIRBuilder.buildSelect(Res: Ty, Tst: Cmp, Op0: One, Op1: Zero);
7283 auto SignedOffset = MIRBuilder.buildFCopysign(Dst: Ty, Src0: BoolFP, Src1: X);
7284
7285 MIRBuilder.buildFAdd(Dst: DstReg, Src0: T, Src1: SignedOffset, Flags);
7286
7287 MI.eraseFromParent();
7288 return Legalized;
7289}
7290
7291LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
7292 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7293 unsigned Flags = MI.getFlags();
7294 LLT Ty = MRI.getType(Reg: DstReg);
7295 const LLT CondTy = Ty.changeElementSize(NewEltSize: 1);
7296
7297 // result = trunc(src);
7298 // if (src < 0.0 && src != result)
7299 // result += -1.0.
7300
7301 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Dst: Ty, Src0: SrcReg, Flags);
7302 auto Zero = MIRBuilder.buildFConstant(Res: Ty, Val: 0.0);
7303
7304 auto Lt0 = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_OLT, Res: CondTy,
7305 Op0: SrcReg, Op1: Zero, Flags);
7306 auto NeTrunc = MIRBuilder.buildFCmp(Pred: CmpInst::FCMP_ONE, Res: CondTy,
7307 Op0: SrcReg, Op1: Trunc, Flags);
7308 auto And = MIRBuilder.buildAnd(Dst: CondTy, Src0: Lt0, Src1: NeTrunc);
7309 auto AddVal = MIRBuilder.buildSITOFP(Dst: Ty, Src0: And);
7310
7311 MIRBuilder.buildFAdd(Dst: DstReg, Src0: Trunc, Src1: AddVal, Flags);
7312 MI.eraseFromParent();
7313 return Legalized;
7314}
7315
7316LegalizerHelper::LegalizeResult
7317LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
7318 const unsigned NumOps = MI.getNumOperands();
7319 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
7320 unsigned PartSize = Src0Ty.getSizeInBits();
7321
7322 LLT WideTy = LLT::scalar(SizeInBits: DstTy.getSizeInBits());
7323 Register ResultReg = MIRBuilder.buildZExt(Res: WideTy, Op: Src0Reg).getReg(Idx: 0);
7324
7325 for (unsigned I = 2; I != NumOps; ++I) {
7326 const unsigned Offset = (I - 1) * PartSize;
7327
7328 Register SrcReg = MI.getOperand(i: I).getReg();
7329 auto ZextInput = MIRBuilder.buildZExt(Res: WideTy, Op: SrcReg);
7330
7331 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
7332 MRI.createGenericVirtualRegister(Ty: WideTy);
7333
7334 auto ShiftAmt = MIRBuilder.buildConstant(Res: WideTy, Val: Offset);
7335 auto Shl = MIRBuilder.buildShl(Dst: WideTy, Src0: ZextInput, Src1: ShiftAmt);
7336 MIRBuilder.buildOr(Dst: NextResult, Src0: ResultReg, Src1: Shl);
7337 ResultReg = NextResult;
7338 }
7339
7340 if (DstTy.isPointer()) {
7341 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
7342 AddrSpace: DstTy.getAddressSpace())) {
7343 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
7344 return UnableToLegalize;
7345 }
7346
7347 MIRBuilder.buildIntToPtr(Dst: DstReg, Src: ResultReg);
7348 }
7349
7350 MI.eraseFromParent();
7351 return Legalized;
7352}
7353
7354LegalizerHelper::LegalizeResult
7355LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
7356 const unsigned NumDst = MI.getNumOperands() - 1;
7357 Register SrcReg = MI.getOperand(i: NumDst).getReg();
7358 Register Dst0Reg = MI.getOperand(i: 0).getReg();
7359 LLT DstTy = MRI.getType(Reg: Dst0Reg);
7360 if (DstTy.isPointer())
7361 return UnableToLegalize; // TODO
7362
7363 SrcReg = coerceToScalar(Val: SrcReg);
7364 if (!SrcReg)
7365 return UnableToLegalize;
7366
7367 // Expand scalarizing unmerge as bitcast to integer and shift.
7368 LLT IntTy = MRI.getType(Reg: SrcReg);
7369
7370 MIRBuilder.buildTrunc(Res: Dst0Reg, Op: SrcReg);
7371
7372 const unsigned DstSize = DstTy.getSizeInBits();
7373 unsigned Offset = DstSize;
7374 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
7375 auto ShiftAmt = MIRBuilder.buildConstant(Res: IntTy, Val: Offset);
7376 auto Shift = MIRBuilder.buildLShr(Dst: IntTy, Src0: SrcReg, Src1: ShiftAmt);
7377 MIRBuilder.buildTrunc(Res: MI.getOperand(i: I), Op: Shift);
7378 }
7379
7380 MI.eraseFromParent();
7381 return Legalized;
7382}
7383
7384/// Lower a vector extract or insert by writing the vector to a stack temporary
7385/// and reloading the element or vector.
7386///
7387/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7388/// =>
7389/// %stack_temp = G_FRAME_INDEX
7390/// G_STORE %vec, %stack_temp
7391/// %idx = clamp(%idx, %vec.getNumElements())
7392/// %element_ptr = G_PTR_ADD %stack_temp, %idx
7393/// %dst = G_LOAD %element_ptr
7394LegalizerHelper::LegalizeResult
7395LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
7396 Register DstReg = MI.getOperand(i: 0).getReg();
7397 Register SrcVec = MI.getOperand(i: 1).getReg();
7398 Register InsertVal;
7399 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7400 InsertVal = MI.getOperand(i: 2).getReg();
7401
7402 Register Idx = MI.getOperand(i: MI.getNumOperands() - 1).getReg();
7403
7404 LLT VecTy = MRI.getType(Reg: SrcVec);
7405 LLT EltTy = VecTy.getElementType();
7406 unsigned NumElts = VecTy.getNumElements();
7407
7408 int64_t IdxVal;
7409 if (mi_match(R: Idx, MRI, P: m_ICst(Cst&: IdxVal)) && IdxVal <= NumElts) {
7410 SmallVector<Register, 8> SrcRegs;
7411 extractParts(Reg: SrcVec, Ty: EltTy, NumParts: NumElts, VRegs&: SrcRegs, MIRBuilder, MRI);
7412
7413 if (InsertVal) {
7414 SrcRegs[IdxVal] = MI.getOperand(i: 2).getReg();
7415 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: SrcRegs);
7416 } else {
7417 MIRBuilder.buildCopy(Res: DstReg, Op: SrcRegs[IdxVal]);
7418 }
7419
7420 MI.eraseFromParent();
7421 return Legalized;
7422 }
7423
7424 if (!EltTy.isByteSized()) { // Not implemented.
7425 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7426 return UnableToLegalize;
7427 }
7428
7429 unsigned EltBytes = EltTy.getSizeInBytes();
7430 Align VecAlign = getStackTemporaryAlignment(Ty: VecTy);
7431 Align EltAlign;
7432
7433 MachinePointerInfo PtrInfo;
7434 auto StackTemp = createStackTemporary(
7435 Bytes: TypeSize::getFixed(ExactSize: VecTy.getSizeInBytes()), Alignment: VecAlign, PtrInfo);
7436 MIRBuilder.buildStore(Val: SrcVec, Addr: StackTemp, PtrInfo, Alignment: VecAlign);
7437
7438 // Get the pointer to the element, and be sure not to hit undefined behavior
7439 // if the index is out of bounds.
7440 Register EltPtr = getVectorElementPointer(VecPtr: StackTemp.getReg(Idx: 0), VecTy, Index: Idx);
7441
7442 if (mi_match(R: Idx, MRI, P: m_ICst(Cst&: IdxVal))) {
7443 int64_t Offset = IdxVal * EltBytes;
7444 PtrInfo = PtrInfo.getWithOffset(O: Offset);
7445 EltAlign = commonAlignment(A: VecAlign, Offset);
7446 } else {
7447 // We lose information with a variable offset.
7448 EltAlign = getStackTemporaryAlignment(Ty: EltTy);
7449 PtrInfo = MachinePointerInfo(MRI.getType(Reg: EltPtr).getAddressSpace());
7450 }
7451
7452 if (InsertVal) {
7453 // Write the inserted element
7454 MIRBuilder.buildStore(Val: InsertVal, Addr: EltPtr, PtrInfo, Alignment: EltAlign);
7455
7456 // Reload the whole vector.
7457 MIRBuilder.buildLoad(Res: DstReg, Addr: StackTemp, PtrInfo, Alignment: VecAlign);
7458 } else {
7459 MIRBuilder.buildLoad(Res: DstReg, Addr: EltPtr, PtrInfo, Alignment: EltAlign);
7460 }
7461
7462 MI.eraseFromParent();
7463 return Legalized;
7464}
7465
7466LegalizerHelper::LegalizeResult
7467LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
7468 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7469 MI.getFirst3RegLLTs();
7470 LLT IdxTy = LLT::scalar(SizeInBits: 32);
7471
7472 ArrayRef<int> Mask = MI.getOperand(i: 3).getShuffleMask();
7473 Register Undef;
7474 SmallVector<Register, 32> BuildVec;
7475 LLT EltTy = DstTy.getScalarType();
7476
7477 for (int Idx : Mask) {
7478 if (Idx < 0) {
7479 if (!Undef.isValid())
7480 Undef = MIRBuilder.buildUndef(Res: EltTy).getReg(Idx: 0);
7481 BuildVec.push_back(Elt: Undef);
7482 continue;
7483 }
7484
7485 if (Src0Ty.isScalar()) {
7486 BuildVec.push_back(Elt: Idx == 0 ? Src0Reg : Src1Reg);
7487 } else {
7488 int NumElts = Src0Ty.getNumElements();
7489 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
7490 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
7491 auto IdxK = MIRBuilder.buildConstant(Res: IdxTy, Val: ExtractIdx);
7492 auto Extract = MIRBuilder.buildExtractVectorElement(Res: EltTy, Val: SrcVec, Idx: IdxK);
7493 BuildVec.push_back(Elt: Extract.getReg(Idx: 0));
7494 }
7495 }
7496
7497 if (DstTy.isScalar())
7498 MIRBuilder.buildCopy(Res: DstReg, Op: BuildVec[0]);
7499 else
7500 MIRBuilder.buildBuildVector(Res: DstReg, Ops: BuildVec);
7501 MI.eraseFromParent();
7502 return Legalized;
7503}
7504
7505Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg,
7506 Register AllocSize,
7507 Align Alignment,
7508 LLT PtrTy) {
7509 LLT IntPtrTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
7510
7511 auto SPTmp = MIRBuilder.buildCopy(Res: PtrTy, Op: SPReg);
7512 SPTmp = MIRBuilder.buildCast(Dst: IntPtrTy, Src: SPTmp);
7513
7514 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
7515 // have to generate an extra instruction to negate the alloc and then use
7516 // G_PTR_ADD to add the negative offset.
7517 auto Alloc = MIRBuilder.buildSub(Dst: IntPtrTy, Src0: SPTmp, Src1: AllocSize);
7518 if (Alignment > Align(1)) {
7519 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
7520 AlignMask.negate();
7521 auto AlignCst = MIRBuilder.buildConstant(Res: IntPtrTy, Val: AlignMask);
7522 Alloc = MIRBuilder.buildAnd(Dst: IntPtrTy, Src0: Alloc, Src1: AlignCst);
7523 }
7524
7525 return MIRBuilder.buildCast(Dst: PtrTy, Src: Alloc).getReg(Idx: 0);
7526}
7527
7528LegalizerHelper::LegalizeResult
7529LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
7530 const auto &MF = *MI.getMF();
7531 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7532 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
7533 return UnableToLegalize;
7534
7535 Register Dst = MI.getOperand(i: 0).getReg();
7536 Register AllocSize = MI.getOperand(i: 1).getReg();
7537 Align Alignment = assumeAligned(Value: MI.getOperand(i: 2).getImm());
7538
7539 LLT PtrTy = MRI.getType(Reg: Dst);
7540 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
7541 Register SPTmp =
7542 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
7543
7544 MIRBuilder.buildCopy(Res: SPReg, Op: SPTmp);
7545 MIRBuilder.buildCopy(Res: Dst, Op: SPTmp);
7546
7547 MI.eraseFromParent();
7548 return Legalized;
7549}
7550
7551LegalizerHelper::LegalizeResult
7552LegalizerHelper::lowerStackSave(MachineInstr &MI) {
7553 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
7554 if (!StackPtr)
7555 return UnableToLegalize;
7556
7557 MIRBuilder.buildCopy(Res: MI.getOperand(i: 0), Op: StackPtr);
7558 MI.eraseFromParent();
7559 return Legalized;
7560}
7561
7562LegalizerHelper::LegalizeResult
7563LegalizerHelper::lowerStackRestore(MachineInstr &MI) {
7564 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
7565 if (!StackPtr)
7566 return UnableToLegalize;
7567
7568 MIRBuilder.buildCopy(Res: StackPtr, Op: MI.getOperand(i: 0));
7569 MI.eraseFromParent();
7570 return Legalized;
7571}
7572
7573LegalizerHelper::LegalizeResult
7574LegalizerHelper::lowerExtract(MachineInstr &MI) {
7575 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7576 unsigned Offset = MI.getOperand(i: 2).getImm();
7577
7578 // Extract sub-vector or one element
7579 if (SrcTy.isVector()) {
7580 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
7581 unsigned DstSize = DstTy.getSizeInBits();
7582
7583 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7584 (Offset + DstSize <= SrcTy.getSizeInBits())) {
7585 // Unmerge and allow access to each Src element for the artifact combiner.
7586 auto Unmerge = MIRBuilder.buildUnmerge(Res: SrcTy.getElementType(), Op: SrcReg);
7587
7588 // Take element(s) we need to extract and copy it (merge them).
7589 SmallVector<Register, 8> SubVectorElts;
7590 for (unsigned Idx = Offset / SrcEltSize;
7591 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
7592 SubVectorElts.push_back(Elt: Unmerge.getReg(Idx));
7593 }
7594 if (SubVectorElts.size() == 1)
7595 MIRBuilder.buildCopy(Res: DstReg, Op: SubVectorElts[0]);
7596 else
7597 MIRBuilder.buildMergeLikeInstr(Res: DstReg, Ops: SubVectorElts);
7598
7599 MI.eraseFromParent();
7600 return Legalized;
7601 }
7602 }
7603
7604 if (DstTy.isScalar() &&
7605 (SrcTy.isScalar() ||
7606 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
7607 LLT SrcIntTy = SrcTy;
7608 if (!SrcTy.isScalar()) {
7609 SrcIntTy = LLT::scalar(SizeInBits: SrcTy.getSizeInBits());
7610 SrcReg = MIRBuilder.buildBitcast(Dst: SrcIntTy, Src: SrcReg).getReg(Idx: 0);
7611 }
7612
7613 if (Offset == 0)
7614 MIRBuilder.buildTrunc(Res: DstReg, Op: SrcReg);
7615 else {
7616 auto ShiftAmt = MIRBuilder.buildConstant(Res: SrcIntTy, Val: Offset);
7617 auto Shr = MIRBuilder.buildLShr(Dst: SrcIntTy, Src0: SrcReg, Src1: ShiftAmt);
7618 MIRBuilder.buildTrunc(Res: DstReg, Op: Shr);
7619 }
7620
7621 MI.eraseFromParent();
7622 return Legalized;
7623 }
7624
7625 return UnableToLegalize;
7626}
7627
7628LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
7629 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
7630 uint64_t Offset = MI.getOperand(i: 3).getImm();
7631
7632 LLT DstTy = MRI.getType(Reg: Src);
7633 LLT InsertTy = MRI.getType(Reg: InsertSrc);
7634
7635 // Insert sub-vector or one element
7636 if (DstTy.isVector() && !InsertTy.isPointer()) {
7637 LLT EltTy = DstTy.getElementType();
7638 unsigned EltSize = EltTy.getSizeInBits();
7639 unsigned InsertSize = InsertTy.getSizeInBits();
7640
7641 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7642 (Offset + InsertSize <= DstTy.getSizeInBits())) {
7643 auto UnmergeSrc = MIRBuilder.buildUnmerge(Res: EltTy, Op: Src);
7644 SmallVector<Register, 8> DstElts;
7645 unsigned Idx = 0;
7646 // Elements from Src before insert start Offset
7647 for (; Idx < Offset / EltSize; ++Idx) {
7648 DstElts.push_back(Elt: UnmergeSrc.getReg(Idx));
7649 }
7650
7651 // Replace elements in Src with elements from InsertSrc
7652 if (InsertTy.getSizeInBits() > EltSize) {
7653 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(Res: EltTy, Op: InsertSrc);
7654 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
7655 ++Idx, ++i) {
7656 DstElts.push_back(Elt: UnmergeInsertSrc.getReg(Idx: i));
7657 }
7658 } else {
7659 DstElts.push_back(Elt: InsertSrc);
7660 ++Idx;
7661 }
7662
7663 // Remaining elements from Src after insert
7664 for (; Idx < DstTy.getNumElements(); ++Idx) {
7665 DstElts.push_back(Elt: UnmergeSrc.getReg(Idx));
7666 }
7667
7668 MIRBuilder.buildMergeLikeInstr(Res: Dst, Ops: DstElts);
7669 MI.eraseFromParent();
7670 return Legalized;
7671 }
7672 }
7673
7674 if (InsertTy.isVector() ||
7675 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
7676 return UnableToLegalize;
7677
7678 const DataLayout &DL = MIRBuilder.getDataLayout();
7679 if ((DstTy.isPointer() &&
7680 DL.isNonIntegralAddressSpace(AddrSpace: DstTy.getAddressSpace())) ||
7681 (InsertTy.isPointer() &&
7682 DL.isNonIntegralAddressSpace(AddrSpace: InsertTy.getAddressSpace()))) {
7683 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
7684 return UnableToLegalize;
7685 }
7686
7687 LLT IntDstTy = DstTy;
7688
7689 if (!DstTy.isScalar()) {
7690 IntDstTy = LLT::scalar(SizeInBits: DstTy.getSizeInBits());
7691 Src = MIRBuilder.buildCast(Dst: IntDstTy, Src).getReg(Idx: 0);
7692 }
7693
7694 if (!InsertTy.isScalar()) {
7695 const LLT IntInsertTy = LLT::scalar(SizeInBits: InsertTy.getSizeInBits());
7696 InsertSrc = MIRBuilder.buildPtrToInt(Dst: IntInsertTy, Src: InsertSrc).getReg(Idx: 0);
7697 }
7698
7699 Register ExtInsSrc = MIRBuilder.buildZExt(Res: IntDstTy, Op: InsertSrc).getReg(Idx: 0);
7700 if (Offset != 0) {
7701 auto ShiftAmt = MIRBuilder.buildConstant(Res: IntDstTy, Val: Offset);
7702 ExtInsSrc = MIRBuilder.buildShl(Dst: IntDstTy, Src0: ExtInsSrc, Src1: ShiftAmt).getReg(Idx: 0);
7703 }
7704
7705 APInt MaskVal = APInt::getBitsSetWithWrap(
7706 numBits: DstTy.getSizeInBits(), loBit: Offset + InsertTy.getSizeInBits(), hiBit: Offset);
7707
7708 auto Mask = MIRBuilder.buildConstant(Res: IntDstTy, Val: MaskVal);
7709 auto MaskedSrc = MIRBuilder.buildAnd(Dst: IntDstTy, Src0: Src, Src1: Mask);
7710 auto Or = MIRBuilder.buildOr(Dst: IntDstTy, Src0: MaskedSrc, Src1: ExtInsSrc);
7711
7712 MIRBuilder.buildCast(Dst, Src: Or);
7713 MI.eraseFromParent();
7714 return Legalized;
7715}
7716
7717LegalizerHelper::LegalizeResult
7718LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
7719 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
7720 MI.getFirst4RegLLTs();
7721 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
7722
7723 LLT Ty = Dst0Ty;
7724 LLT BoolTy = Dst1Ty;
7725
7726 Register NewDst0 = MRI.cloneVirtualRegister(VReg: Dst0);
7727
7728 if (IsAdd)
7729 MIRBuilder.buildAdd(Dst: NewDst0, Src0: LHS, Src1: RHS);
7730 else
7731 MIRBuilder.buildSub(Dst: NewDst0, Src0: LHS, Src1: RHS);
7732
7733 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7734
7735 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
7736
7737 // For an addition, the result should be less than one of the operands (LHS)
7738 // if and only if the other operand (RHS) is negative, otherwise there will
7739 // be overflow.
7740 // For a subtraction, the result should be less than one of the operands
7741 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7742 // otherwise there will be overflow.
7743 auto ResultLowerThanLHS =
7744 MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: BoolTy, Op0: NewDst0, Op1: LHS);
7745 auto ConditionRHS = MIRBuilder.buildICmp(
7746 Pred: IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, Res: BoolTy, Op0: RHS, Op1: Zero);
7747
7748 MIRBuilder.buildXor(Dst: Dst1, Src0: ConditionRHS, Src1: ResultLowerThanLHS);
7749
7750 MIRBuilder.buildCopy(Res: Dst0, Op: NewDst0);
7751 MI.eraseFromParent();
7752
7753 return Legalized;
7754}
7755
7756LegalizerHelper::LegalizeResult
7757LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
7758 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7759 LLT Ty = MRI.getType(Reg: Res);
7760 bool IsSigned;
7761 bool IsAdd;
7762 unsigned BaseOp;
7763 switch (MI.getOpcode()) {
7764 default:
7765 llvm_unreachable("unexpected addsat/subsat opcode");
7766 case TargetOpcode::G_UADDSAT:
7767 IsSigned = false;
7768 IsAdd = true;
7769 BaseOp = TargetOpcode::G_ADD;
7770 break;
7771 case TargetOpcode::G_SADDSAT:
7772 IsSigned = true;
7773 IsAdd = true;
7774 BaseOp = TargetOpcode::G_ADD;
7775 break;
7776 case TargetOpcode::G_USUBSAT:
7777 IsSigned = false;
7778 IsAdd = false;
7779 BaseOp = TargetOpcode::G_SUB;
7780 break;
7781 case TargetOpcode::G_SSUBSAT:
7782 IsSigned = true;
7783 IsAdd = false;
7784 BaseOp = TargetOpcode::G_SUB;
7785 break;
7786 }
7787
7788 if (IsSigned) {
7789 // sadd.sat(a, b) ->
7790 // hi = 0x7fffffff - smax(a, 0)
7791 // lo = 0x80000000 - smin(a, 0)
7792 // a + smin(smax(lo, b), hi)
7793 // ssub.sat(a, b) ->
7794 // lo = smax(a, -1) - 0x7fffffff
7795 // hi = smin(a, -1) - 0x80000000
7796 // a - smin(smax(lo, b), hi)
7797 // TODO: AMDGPU can use a "median of 3" instruction here:
7798 // a +/- med3(lo, b, hi)
7799 uint64_t NumBits = Ty.getScalarSizeInBits();
7800 auto MaxVal =
7801 MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMaxValue(numBits: NumBits));
7802 auto MinVal =
7803 MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMinValue(numBits: NumBits));
7804 MachineInstrBuilder Hi, Lo;
7805 if (IsAdd) {
7806 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
7807 Hi = MIRBuilder.buildSub(Dst: Ty, Src0: MaxVal, Src1: MIRBuilder.buildSMax(Dst: Ty, Src0: LHS, Src1: Zero));
7808 Lo = MIRBuilder.buildSub(Dst: Ty, Src0: MinVal, Src1: MIRBuilder.buildSMin(Dst: Ty, Src0: LHS, Src1: Zero));
7809 } else {
7810 auto NegOne = MIRBuilder.buildConstant(Res: Ty, Val: -1);
7811 Lo = MIRBuilder.buildSub(Dst: Ty, Src0: MIRBuilder.buildSMax(Dst: Ty, Src0: LHS, Src1: NegOne),
7812 Src1: MaxVal);
7813 Hi = MIRBuilder.buildSub(Dst: Ty, Src0: MIRBuilder.buildSMin(Dst: Ty, Src0: LHS, Src1: NegOne),
7814 Src1: MinVal);
7815 }
7816 auto RHSClamped =
7817 MIRBuilder.buildSMin(Dst: Ty, Src0: MIRBuilder.buildSMax(Dst: Ty, Src0: Lo, Src1: RHS), Src1: Hi);
7818 MIRBuilder.buildInstr(Opc: BaseOp, DstOps: {Res}, SrcOps: {LHS, RHSClamped});
7819 } else {
7820 // uadd.sat(a, b) -> a + umin(~a, b)
7821 // usub.sat(a, b) -> a - umin(a, b)
7822 Register Not = IsAdd ? MIRBuilder.buildNot(Dst: Ty, Src0: LHS).getReg(Idx: 0) : LHS;
7823 auto Min = MIRBuilder.buildUMin(Dst: Ty, Src0: Not, Src1: RHS);
7824 MIRBuilder.buildInstr(Opc: BaseOp, DstOps: {Res}, SrcOps: {LHS, Min});
7825 }
7826
7827 MI.eraseFromParent();
7828 return Legalized;
7829}
7830
7831LegalizerHelper::LegalizeResult
7832LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
7833 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7834 LLT Ty = MRI.getType(Reg: Res);
7835 LLT BoolTy = Ty.changeElementSize(NewEltSize: 1);
7836 bool IsSigned;
7837 bool IsAdd;
7838 unsigned OverflowOp;
7839 switch (MI.getOpcode()) {
7840 default:
7841 llvm_unreachable("unexpected addsat/subsat opcode");
7842 case TargetOpcode::G_UADDSAT:
7843 IsSigned = false;
7844 IsAdd = true;
7845 OverflowOp = TargetOpcode::G_UADDO;
7846 break;
7847 case TargetOpcode::G_SADDSAT:
7848 IsSigned = true;
7849 IsAdd = true;
7850 OverflowOp = TargetOpcode::G_SADDO;
7851 break;
7852 case TargetOpcode::G_USUBSAT:
7853 IsSigned = false;
7854 IsAdd = false;
7855 OverflowOp = TargetOpcode::G_USUBO;
7856 break;
7857 case TargetOpcode::G_SSUBSAT:
7858 IsSigned = true;
7859 IsAdd = false;
7860 OverflowOp = TargetOpcode::G_SSUBO;
7861 break;
7862 }
7863
7864 auto OverflowRes =
7865 MIRBuilder.buildInstr(Opc: OverflowOp, DstOps: {Ty, BoolTy}, SrcOps: {LHS, RHS});
7866 Register Tmp = OverflowRes.getReg(Idx: 0);
7867 Register Ov = OverflowRes.getReg(Idx: 1);
7868 MachineInstrBuilder Clamp;
7869 if (IsSigned) {
7870 // sadd.sat(a, b) ->
7871 // {tmp, ov} = saddo(a, b)
7872 // ov ? (tmp >>s 31) + 0x80000000 : r
7873 // ssub.sat(a, b) ->
7874 // {tmp, ov} = ssubo(a, b)
7875 // ov ? (tmp >>s 31) + 0x80000000 : r
7876 uint64_t NumBits = Ty.getScalarSizeInBits();
7877 auto ShiftAmount = MIRBuilder.buildConstant(Res: Ty, Val: NumBits - 1);
7878 auto Sign = MIRBuilder.buildAShr(Dst: Ty, Src0: Tmp, Src1: ShiftAmount);
7879 auto MinVal =
7880 MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMinValue(numBits: NumBits));
7881 Clamp = MIRBuilder.buildAdd(Dst: Ty, Src0: Sign, Src1: MinVal);
7882 } else {
7883 // uadd.sat(a, b) ->
7884 // {tmp, ov} = uaddo(a, b)
7885 // ov ? 0xffffffff : tmp
7886 // usub.sat(a, b) ->
7887 // {tmp, ov} = usubo(a, b)
7888 // ov ? 0 : tmp
7889 Clamp = MIRBuilder.buildConstant(Res: Ty, Val: IsAdd ? -1 : 0);
7890 }
7891 MIRBuilder.buildSelect(Res, Tst: Ov, Op0: Clamp, Op1: Tmp);
7892
7893 MI.eraseFromParent();
7894 return Legalized;
7895}
7896
7897LegalizerHelper::LegalizeResult
7898LegalizerHelper::lowerShlSat(MachineInstr &MI) {
7899 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
7900 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
7901 "Expected shlsat opcode!");
7902 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
7903 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7904 LLT Ty = MRI.getType(Reg: Res);
7905 LLT BoolTy = Ty.changeElementSize(NewEltSize: 1);
7906
7907 unsigned BW = Ty.getScalarSizeInBits();
7908 auto Result = MIRBuilder.buildShl(Dst: Ty, Src0: LHS, Src1: RHS);
7909 auto Orig = IsSigned ? MIRBuilder.buildAShr(Dst: Ty, Src0: Result, Src1: RHS)
7910 : MIRBuilder.buildLShr(Dst: Ty, Src0: Result, Src1: RHS);
7911
7912 MachineInstrBuilder SatVal;
7913 if (IsSigned) {
7914 auto SatMin = MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMinValue(numBits: BW));
7915 auto SatMax = MIRBuilder.buildConstant(Res: Ty, Val: APInt::getSignedMaxValue(numBits: BW));
7916 auto Cmp = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SLT, Res: BoolTy, Op0: LHS,
7917 Op1: MIRBuilder.buildConstant(Res: Ty, Val: 0));
7918 SatVal = MIRBuilder.buildSelect(Res: Ty, Tst: Cmp, Op0: SatMin, Op1: SatMax);
7919 } else {
7920 SatVal = MIRBuilder.buildConstant(Res: Ty, Val: APInt::getMaxValue(numBits: BW));
7921 }
7922 auto Ov = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_NE, Res: BoolTy, Op0: LHS, Op1: Orig);
7923 MIRBuilder.buildSelect(Res, Tst: Ov, Op0: SatVal, Op1: Result);
7924
7925 MI.eraseFromParent();
7926 return Legalized;
7927}
7928
7929LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
7930 auto [Dst, Src] = MI.getFirst2Regs();
7931 const LLT Ty = MRI.getType(Reg: Src);
7932 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
7933 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7934
7935 // Swap most and least significant byte, set remaining bytes in Res to zero.
7936 auto ShiftAmt = MIRBuilder.buildConstant(Res: Ty, Val: BaseShiftAmt);
7937 auto LSByteShiftedLeft = MIRBuilder.buildShl(Dst: Ty, Src0: Src, Src1: ShiftAmt);
7938 auto MSByteShiftedRight = MIRBuilder.buildLShr(Dst: Ty, Src0: Src, Src1: ShiftAmt);
7939 auto Res = MIRBuilder.buildOr(Dst: Ty, Src0: MSByteShiftedRight, Src1: LSByteShiftedLeft);
7940
7941 // Set i-th high/low byte in Res to i-th low/high byte from Src.
7942 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
7943 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
7944 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7945 auto Mask = MIRBuilder.buildConstant(Res: Ty, Val: APMask);
7946 auto ShiftAmt = MIRBuilder.buildConstant(Res: Ty, Val: BaseShiftAmt - 16 * i);
7947 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
7948 auto LoByte = MIRBuilder.buildAnd(Dst: Ty, Src0: Src, Src1: Mask);
7949 auto LoShiftedLeft = MIRBuilder.buildShl(Dst: Ty, Src0: LoByte, Src1: ShiftAmt);
7950 Res = MIRBuilder.buildOr(Dst: Ty, Src0: Res, Src1: LoShiftedLeft);
7951 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
7952 auto SrcShiftedRight = MIRBuilder.buildLShr(Dst: Ty, Src0: Src, Src1: ShiftAmt);
7953 auto HiShiftedRight = MIRBuilder.buildAnd(Dst: Ty, Src0: SrcShiftedRight, Src1: Mask);
7954 Res = MIRBuilder.buildOr(Dst: Ty, Src0: Res, Src1: HiShiftedRight);
7955 }
7956 Res.getInstr()->getOperand(i: 0).setReg(Dst);
7957
7958 MI.eraseFromParent();
7959 return Legalized;
7960}
7961
7962//{ (Src & Mask) >> N } | { (Src << N) & Mask }
7963static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
7964 MachineInstrBuilder Src, const APInt &Mask) {
7965 const LLT Ty = Dst.getLLTTy(MRI: *B.getMRI());
7966 MachineInstrBuilder C_N = B.buildConstant(Res: Ty, Val: N);
7967 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Res: Ty, Val: Mask);
7968 auto LHS = B.buildLShr(Dst: Ty, Src0: B.buildAnd(Dst: Ty, Src0: Src, Src1: MaskLoNTo0), Src1: C_N);
7969 auto RHS = B.buildAnd(Dst: Ty, Src0: B.buildShl(Dst: Ty, Src0: Src, Src1: C_N), Src1: MaskLoNTo0);
7970 return B.buildOr(Dst, Src0: LHS, Src1: RHS);
7971}
7972
7973LegalizerHelper::LegalizeResult
7974LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
7975 auto [Dst, Src] = MI.getFirst2Regs();
7976 const LLT Ty = MRI.getType(Reg: Src);
7977 unsigned Size = Ty.getSizeInBits();
7978
7979 MachineInstrBuilder BSWAP =
7980 MIRBuilder.buildInstr(Opc: TargetOpcode::G_BSWAP, DstOps: {Ty}, SrcOps: {Src});
7981
7982 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7983 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7984 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7985 MachineInstrBuilder Swap4 =
7986 SwapN(N: 4, Dst: Ty, B&: MIRBuilder, Src: BSWAP, Mask: APInt::getSplat(NewLen: Size, V: APInt(8, 0xF0)));
7987
7988 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7989 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7990 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7991 MachineInstrBuilder Swap2 =
7992 SwapN(N: 2, Dst: Ty, B&: MIRBuilder, Src: Swap4, Mask: APInt::getSplat(NewLen: Size, V: APInt(8, 0xCC)));
7993
7994 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
7995 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
7996 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
7997 SwapN(N: 1, Dst, B&: MIRBuilder, Src: Swap2, Mask: APInt::getSplat(NewLen: Size, V: APInt(8, 0xAA)));
7998
7999 MI.eraseFromParent();
8000 return Legalized;
8001}
8002
8003LegalizerHelper::LegalizeResult
8004LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
8005 MachineFunction &MF = MIRBuilder.getMF();
8006
8007 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8008 int NameOpIdx = IsRead ? 1 : 0;
8009 int ValRegIndex = IsRead ? 0 : 1;
8010
8011 Register ValReg = MI.getOperand(i: ValRegIndex).getReg();
8012 const LLT Ty = MRI.getType(Reg: ValReg);
8013 const MDString *RegStr = cast<MDString>(
8014 Val: cast<MDNode>(Val: MI.getOperand(i: NameOpIdx).getMetadata())->getOperand(I: 0));
8015
8016 Register PhysReg = TLI.getRegisterByName(RegName: RegStr->getString().data(), Ty, MF);
8017 if (!PhysReg.isValid())
8018 return UnableToLegalize;
8019
8020 if (IsRead)
8021 MIRBuilder.buildCopy(Res: ValReg, Op: PhysReg);
8022 else
8023 MIRBuilder.buildCopy(Res: PhysReg, Op: ValReg);
8024
8025 MI.eraseFromParent();
8026 return Legalized;
8027}
8028
8029LegalizerHelper::LegalizeResult
8030LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
8031 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
8032 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8033 Register Result = MI.getOperand(i: 0).getReg();
8034 LLT OrigTy = MRI.getType(Reg: Result);
8035 auto SizeInBits = OrigTy.getScalarSizeInBits();
8036 LLT WideTy = OrigTy.changeElementSize(NewEltSize: SizeInBits * 2);
8037
8038 auto LHS = MIRBuilder.buildInstr(Opc: ExtOp, DstOps: {WideTy}, SrcOps: {MI.getOperand(i: 1)});
8039 auto RHS = MIRBuilder.buildInstr(Opc: ExtOp, DstOps: {WideTy}, SrcOps: {MI.getOperand(i: 2)});
8040 auto Mul = MIRBuilder.buildMul(Dst: WideTy, Src0: LHS, Src1: RHS);
8041 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
8042
8043 auto ShiftAmt = MIRBuilder.buildConstant(Res: WideTy, Val: SizeInBits);
8044 auto Shifted = MIRBuilder.buildInstr(Opc: ShiftOp, DstOps: {WideTy}, SrcOps: {Mul, ShiftAmt});
8045 MIRBuilder.buildTrunc(Res: Result, Op: Shifted);
8046
8047 MI.eraseFromParent();
8048 return Legalized;
8049}
8050
8051LegalizerHelper::LegalizeResult
8052LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
8053 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
8054 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(i: 2).getImm());
8055
8056 if (Mask == fcNone) {
8057 MIRBuilder.buildConstant(Res: DstReg, Val: 0);
8058 MI.eraseFromParent();
8059 return Legalized;
8060 }
8061 if (Mask == fcAllFlags) {
8062 MIRBuilder.buildConstant(Res: DstReg, Val: 1);
8063 MI.eraseFromParent();
8064 return Legalized;
8065 }
8066
8067 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
8068 // version
8069
8070 unsigned BitSize = SrcTy.getScalarSizeInBits();
8071 const fltSemantics &Semantics = getFltSemanticForLLT(Ty: SrcTy.getScalarType());
8072
8073 LLT IntTy = LLT::scalar(SizeInBits: BitSize);
8074 if (SrcTy.isVector())
8075 IntTy = LLT::vector(EC: SrcTy.getElementCount(), ScalarTy: IntTy);
8076 auto AsInt = MIRBuilder.buildCopy(Res: IntTy, Op: SrcReg);
8077
8078 // Various masks.
8079 APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
8080 APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
8081 APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
8082 APInt ExpMask = Inf;
8083 APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
8084 APInt QNaNBitMask =
8085 APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - 1);
8086 APInt InvertionMask = APInt::getAllOnes(numBits: DstTy.getScalarSizeInBits());
8087
8088 auto SignBitC = MIRBuilder.buildConstant(Res: IntTy, Val: SignBit);
8089 auto ValueMaskC = MIRBuilder.buildConstant(Res: IntTy, Val: ValueMask);
8090 auto InfC = MIRBuilder.buildConstant(Res: IntTy, Val: Inf);
8091 auto ExpMaskC = MIRBuilder.buildConstant(Res: IntTy, Val: ExpMask);
8092 auto ZeroC = MIRBuilder.buildConstant(Res: IntTy, Val: 0);
8093
8094 auto Abs = MIRBuilder.buildAnd(Dst: IntTy, Src0: AsInt, Src1: ValueMaskC);
8095 auto Sign =
8096 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_NE, Res: DstTy, Op0: AsInt, Op1: Abs);
8097
8098 auto Res = MIRBuilder.buildConstant(Res: DstTy, Val: 0);
8099 // Clang doesn't support capture of structured bindings:
8100 LLT DstTyCopy = DstTy;
8101 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
8102 Res = MIRBuilder.buildOr(Dst: DstTyCopy, Src0: Res, Src1: ToAppend);
8103 };
8104
8105 // Tests that involve more than one class should be processed first.
8106 if ((Mask & fcFinite) == fcFinite) {
8107 // finite(V) ==> abs(V) u< exp_mask
8108 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: Abs,
8109 Op1: ExpMaskC));
8110 Mask &= ~fcFinite;
8111 } else if ((Mask & fcFinite) == fcPosFinite) {
8112 // finite(V) && V > 0 ==> V u< exp_mask
8113 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: AsInt,
8114 Op1: ExpMaskC));
8115 Mask &= ~fcPosFinite;
8116 } else if ((Mask & fcFinite) == fcNegFinite) {
8117 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
8118 auto Cmp = MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: Abs,
8119 Op1: ExpMaskC);
8120 auto And = MIRBuilder.buildAnd(Dst: DstTy, Src0: Cmp, Src1: Sign);
8121 appendToRes(And);
8122 Mask &= ~fcNegFinite;
8123 }
8124
8125 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
8126 // fcZero | fcSubnormal => test all exponent bits are 0
8127 // TODO: Handle sign bit specific cases
8128 // TODO: Handle inverted case
8129 if (PartialCheck == (fcZero | fcSubnormal)) {
8130 auto ExpBits = MIRBuilder.buildAnd(Dst: IntTy, Src0: AsInt, Src1: ExpMaskC);
8131 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
8132 Op0: ExpBits, Op1: ZeroC));
8133 Mask &= ~PartialCheck;
8134 }
8135 }
8136
8137 // Check for individual classes.
8138 if (FPClassTest PartialCheck = Mask & fcZero) {
8139 if (PartialCheck == fcPosZero)
8140 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
8141 Op0: AsInt, Op1: ZeroC));
8142 else if (PartialCheck == fcZero)
8143 appendToRes(
8144 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy, Op0: Abs, Op1: ZeroC));
8145 else // fcNegZero
8146 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
8147 Op0: AsInt, Op1: SignBitC));
8148 }
8149
8150 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
8151 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
8152 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
8153 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
8154 auto OneC = MIRBuilder.buildConstant(Res: IntTy, Val: 1);
8155 auto VMinusOne = MIRBuilder.buildSub(Dst: IntTy, Src0: V, Src1: OneC);
8156 auto SubnormalRes =
8157 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: VMinusOne,
8158 Op1: MIRBuilder.buildConstant(Res: IntTy, Val: AllOneMantissa));
8159 if (PartialCheck == fcNegSubnormal)
8160 SubnormalRes = MIRBuilder.buildAnd(Dst: DstTy, Src0: SubnormalRes, Src1: Sign);
8161 appendToRes(SubnormalRes);
8162 }
8163
8164 if (FPClassTest PartialCheck = Mask & fcInf) {
8165 if (PartialCheck == fcPosInf)
8166 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
8167 Op0: AsInt, Op1: InfC));
8168 else if (PartialCheck == fcInf)
8169 appendToRes(
8170 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy, Op0: Abs, Op1: InfC));
8171 else { // fcNegInf
8172 APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
8173 auto NegInfC = MIRBuilder.buildConstant(Res: IntTy, Val: NegInf);
8174 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_EQ, Res: DstTy,
8175 Op0: AsInt, Op1: NegInfC));
8176 }
8177 }
8178
8179 if (FPClassTest PartialCheck = Mask & fcNan) {
8180 auto InfWithQnanBitC = MIRBuilder.buildConstant(Res: IntTy, Val: Inf | QNaNBitMask);
8181 if (PartialCheck == fcNan) {
8182 // isnan(V) ==> abs(V) u> int(inf)
8183 appendToRes(
8184 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_UGT, Res: DstTy, Op0: Abs, Op1: InfC));
8185 } else if (PartialCheck == fcQNan) {
8186 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
8187 appendToRes(MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_UGE, Res: DstTy, Op0: Abs,
8188 Op1: InfWithQnanBitC));
8189 } else { // fcSNan
8190 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
8191 // abs(V) u< (unsigned(Inf) | quiet_bit)
8192 auto IsNan =
8193 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_UGT, Res: DstTy, Op0: Abs, Op1: InfC);
8194 auto IsNotQnan = MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy,
8195 Op0: Abs, Op1: InfWithQnanBitC);
8196 appendToRes(MIRBuilder.buildAnd(Dst: DstTy, Src0: IsNan, Src1: IsNotQnan));
8197 }
8198 }
8199
8200 if (FPClassTest PartialCheck = Mask & fcNormal) {
8201 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
8202 // (max_exp-1))
8203 APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: 1));
8204 auto ExpMinusOne = MIRBuilder.buildSub(
8205 Dst: IntTy, Src0: Abs, Src1: MIRBuilder.buildConstant(Res: IntTy, Val: ExpLSB));
8206 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8207 auto NormalRes =
8208 MIRBuilder.buildICmp(Pred: CmpInst::Predicate::ICMP_ULT, Res: DstTy, Op0: ExpMinusOne,
8209 Op1: MIRBuilder.buildConstant(Res: IntTy, Val: MaxExpMinusOne));
8210 if (PartialCheck == fcNegNormal)
8211 NormalRes = MIRBuilder.buildAnd(Dst: DstTy, Src0: NormalRes, Src1: Sign);
8212 else if (PartialCheck == fcPosNormal) {
8213 auto PosSign = MIRBuilder.buildXor(
8214 Dst: DstTy, Src0: Sign, Src1: MIRBuilder.buildConstant(Res: DstTy, Val: InvertionMask));
8215 NormalRes = MIRBuilder.buildAnd(Dst: DstTy, Src0: NormalRes, Src1: PosSign);
8216 }
8217 appendToRes(NormalRes);
8218 }
8219
8220 MIRBuilder.buildCopy(Res: DstReg, Op: Res);
8221 MI.eraseFromParent();
8222 return Legalized;
8223}
8224
8225LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
8226 // Implement G_SELECT in terms of XOR, AND, OR.
8227 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8228 MI.getFirst4RegLLTs();
8229
8230 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8231 if (IsEltPtr) {
8232 LLT ScalarPtrTy = LLT::scalar(SizeInBits: DstTy.getScalarSizeInBits());
8233 LLT NewTy = DstTy.changeElementType(NewEltTy: ScalarPtrTy);
8234 Op1Reg = MIRBuilder.buildPtrToInt(Dst: NewTy, Src: Op1Reg).getReg(Idx: 0);
8235 Op2Reg = MIRBuilder.buildPtrToInt(Dst: NewTy, Src: Op2Reg).getReg(Idx: 0);
8236 DstTy = NewTy;
8237 }
8238
8239 if (MaskTy.isScalar()) {
8240 // Turn the scalar condition into a vector condition mask if needed.
8241
8242 Register MaskElt = MaskReg;
8243
8244 // The condition was potentially zero extended before, but we want a sign
8245 // extended boolean.
8246 if (MaskTy != LLT::scalar(SizeInBits: 1))
8247 MaskElt = MIRBuilder.buildSExtInReg(Res: MaskTy, Op: MaskElt, ImmOp: 1).getReg(Idx: 0);
8248
8249 // Continue the sign extension (or truncate) to match the data type.
8250 MaskElt =
8251 MIRBuilder.buildSExtOrTrunc(Res: DstTy.getScalarType(), Op: MaskElt).getReg(Idx: 0);
8252
8253 if (DstTy.isVector()) {
8254 // Generate a vector splat idiom.
8255 auto ShufSplat = MIRBuilder.buildShuffleSplat(Res: DstTy, Src: MaskElt);
8256 MaskReg = ShufSplat.getReg(Idx: 0);
8257 } else {
8258 MaskReg = MaskElt;
8259 }
8260 MaskTy = DstTy;
8261 } else if (!DstTy.isVector()) {
8262 // Cannot handle the case that mask is a vector and dst is a scalar.
8263 return UnableToLegalize;
8264 }
8265
8266 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8267 return UnableToLegalize;
8268 }
8269
8270 auto NotMask = MIRBuilder.buildNot(Dst: MaskTy, Src0: MaskReg);
8271 auto NewOp1 = MIRBuilder.buildAnd(Dst: MaskTy, Src0: Op1Reg, Src1: MaskReg);
8272 auto NewOp2 = MIRBuilder.buildAnd(Dst: MaskTy, Src0: Op2Reg, Src1: NotMask);
8273 if (IsEltPtr) {
8274 auto Or = MIRBuilder.buildOr(Dst: DstTy, Src0: NewOp1, Src1: NewOp2);
8275 MIRBuilder.buildIntToPtr(Dst: DstReg, Src: Or);
8276 } else {
8277 MIRBuilder.buildOr(Dst: DstReg, Src0: NewOp1, Src1: NewOp2);
8278 }
8279 MI.eraseFromParent();
8280 return Legalized;
8281}
8282
8283LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
8284 // Split DIVREM into individual instructions.
8285 unsigned Opcode = MI.getOpcode();
8286
8287 MIRBuilder.buildInstr(
8288 Opc: Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8289 : TargetOpcode::G_UDIV,
8290 DstOps: {MI.getOperand(i: 0).getReg()}, SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
8291 MIRBuilder.buildInstr(
8292 Opc: Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8293 : TargetOpcode::G_UREM,
8294 DstOps: {MI.getOperand(i: 1).getReg()}, SrcOps: {MI.getOperand(i: 2), MI.getOperand(i: 3)});
8295 MI.eraseFromParent();
8296 return Legalized;
8297}
8298
8299LegalizerHelper::LegalizeResult
8300LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
8301 // Expand %res = G_ABS %a into:
8302 // %v1 = G_ASHR %a, scalar_size-1
8303 // %v2 = G_ADD %a, %v1
8304 // %res = G_XOR %v2, %v1
8305 LLT DstTy = MRI.getType(Reg: MI.getOperand(i: 0).getReg());
8306 Register OpReg = MI.getOperand(i: 1).getReg();
8307 auto ShiftAmt =
8308 MIRBuilder.buildConstant(Res: DstTy, Val: DstTy.getScalarSizeInBits() - 1);
8309 auto Shift = MIRBuilder.buildAShr(Dst: DstTy, Src0: OpReg, Src1: ShiftAmt);
8310 auto Add = MIRBuilder.buildAdd(Dst: DstTy, Src0: OpReg, Src1: Shift);
8311 MIRBuilder.buildXor(Dst: MI.getOperand(i: 0).getReg(), Src0: Add, Src1: Shift);
8312 MI.eraseFromParent();
8313 return Legalized;
8314}
8315
8316LegalizerHelper::LegalizeResult
8317LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
8318 // Expand %res = G_ABS %a into:
8319 // %v1 = G_CONSTANT 0
8320 // %v2 = G_SUB %v1, %a
8321 // %res = G_SMAX %a, %v2
8322 Register SrcReg = MI.getOperand(i: 1).getReg();
8323 LLT Ty = MRI.getType(Reg: SrcReg);
8324 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0);
8325 auto Sub = MIRBuilder.buildSub(Dst: Ty, Src0: Zero, Src1: SrcReg);
8326 MIRBuilder.buildSMax(Dst: MI.getOperand(i: 0), Src0: SrcReg, Src1: Sub);
8327 MI.eraseFromParent();
8328 return Legalized;
8329}
8330
8331LegalizerHelper::LegalizeResult
8332LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
8333 Register SrcReg = MI.getOperand(i: 1).getReg();
8334 Register DestReg = MI.getOperand(i: 0).getReg();
8335 LLT Ty = MRI.getType(Reg: SrcReg), IType = LLT::scalar(SizeInBits: 1);
8336 auto Zero = MIRBuilder.buildConstant(Res: Ty, Val: 0).getReg(Idx: 0);
8337 auto Sub = MIRBuilder.buildSub(Dst: Ty, Src0: Zero, Src1: SrcReg).getReg(Idx: 0);
8338 auto ICmp = MIRBuilder.buildICmp(Pred: CmpInst::ICMP_SGT, Res: IType, Op0: SrcReg, Op1: Zero);
8339 MIRBuilder.buildSelect(Res: DestReg, Tst: ICmp, Op0: SrcReg, Op1: Sub);
8340 MI.eraseFromParent();
8341 return Legalized;
8342}
8343
8344LegalizerHelper::LegalizeResult
8345LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
8346 Register SrcReg = MI.getOperand(i: 1).getReg();
8347 LLT SrcTy = MRI.getType(Reg: SrcReg);
8348 LLT DstTy = MRI.getType(Reg: SrcReg);
8349
8350 // The source could be a scalar if the IR type was <1 x sN>.
8351 if (SrcTy.isScalar()) {
8352 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8353 return UnableToLegalize; // FIXME: handle extension.
8354 // This can be just a plain copy.
8355 Observer.changingInstr(MI);
8356 MI.setDesc(MIRBuilder.getTII().get(Opcode: TargetOpcode::COPY));
8357 Observer.changedInstr(MI);
8358 return Legalized;
8359 }
8360 return UnableToLegalize;
8361}
8362
8363static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
8364
8365LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) {
8366 MachineFunction &MF = *MI.getMF();
8367 const DataLayout &DL = MIRBuilder.getDataLayout();
8368 LLVMContext &Ctx = MF.getFunction().getContext();
8369 Register ListPtr = MI.getOperand(i: 1).getReg();
8370 LLT PtrTy = MRI.getType(Reg: ListPtr);
8371
8372 // LstPtr is a pointer to the head of the list. Get the address
8373 // of the head of the list.
8374 Align PtrAlignment = DL.getABITypeAlign(Ty: getTypeForLLT(Ty: PtrTy, C&: Ctx));
8375 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
8376 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad, MemTy: PtrTy, base_alignment: PtrAlignment);
8377 auto VAList = MIRBuilder.buildLoad(Res: PtrTy, Addr: ListPtr, MMO&: *PtrLoadMMO).getReg(Idx: 0);
8378
8379 const Align A(MI.getOperand(i: 2).getImm());
8380 LLT PtrTyAsScalarTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
8381 if (A > TLI.getMinStackArgumentAlignment()) {
8382 Register AlignAmt =
8383 MIRBuilder.buildConstant(Res: PtrTyAsScalarTy, Val: A.value() - 1).getReg(Idx: 0);
8384 auto AddDst = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: VAList, Op1: AlignAmt);
8385 auto AndDst = MIRBuilder.buildMaskLowPtrBits(Res: PtrTy, Op0: AddDst, NumBits: Log2(A));
8386 VAList = AndDst.getReg(Idx: 0);
8387 }
8388
8389 // Increment the pointer, VAList, to the next vaarg
8390 // The list should be bumped by the size of element in the current head of
8391 // list.
8392 Register Dst = MI.getOperand(i: 0).getReg();
8393 LLT LLTTy = MRI.getType(Reg: Dst);
8394 Type *Ty = getTypeForLLT(Ty: LLTTy, C&: Ctx);
8395 auto IncAmt =
8396 MIRBuilder.buildConstant(Res: PtrTyAsScalarTy, Val: DL.getTypeAllocSize(Ty));
8397 auto Succ = MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: VAList, Op1: IncAmt);
8398
8399 // Store the increment VAList to the legalized pointer
8400 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
8401 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOStore, MemTy: PtrTy, base_alignment: PtrAlignment);
8402 MIRBuilder.buildStore(Val: Succ, Addr: ListPtr, MMO&: *StoreMMO);
8403 // Load the actual argument out of the pointer VAList
8404 Align EltAlignment = DL.getABITypeAlign(Ty);
8405 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
8406 PtrInfo: MachinePointerInfo(), f: MachineMemOperand::MOLoad, MemTy: LLTTy, base_alignment: EltAlignment);
8407 MIRBuilder.buildLoad(Res: Dst, Addr: VAList, MMO&: *EltLoadMMO);
8408
8409 MI.eraseFromParent();
8410 return Legalized;
8411}
8412
8413static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
8414 // On Darwin, -Os means optimize for size without hurting performance, so
8415 // only really optimize for size when -Oz (MinSize) is used.
8416 if (MF.getTarget().getTargetTriple().isOSDarwin())
8417 return MF.getFunction().hasMinSize();
8418 return MF.getFunction().hasOptSize();
8419}
8420
8421// Returns a list of types to use for memory op lowering in MemOps. A partial
8422// port of findOptimalMemOpLowering in TargetLowering.
8423static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8424 unsigned Limit, const MemOp &Op,
8425 unsigned DstAS, unsigned SrcAS,
8426 const AttributeList &FuncAttributes,
8427 const TargetLowering &TLI) {
8428 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8429 return false;
8430
8431 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8432
8433 if (Ty == LLT()) {
8434 // Use the largest scalar type whose alignment constraints are satisfied.
8435 // We only need to check DstAlign here as SrcAlign is always greater or
8436 // equal to DstAlign (or zero).
8437 Ty = LLT::scalar(SizeInBits: 64);
8438 if (Op.isFixedDstAlign())
8439 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8440 !TLI.allowsMisalignedMemoryAccesses(Ty, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
8441 Ty = LLT::scalar(SizeInBits: Ty.getSizeInBytes());
8442 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8443 // FIXME: check for the largest legal type we can load/store to.
8444 }
8445
8446 unsigned NumMemOps = 0;
8447 uint64_t Size = Op.size();
8448 while (Size) {
8449 unsigned TySize = Ty.getSizeInBytes();
8450 while (TySize > Size) {
8451 // For now, only use non-vector load / store's for the left-over pieces.
8452 LLT NewTy = Ty;
8453 // FIXME: check for mem op safety and legality of the types. Not all of
8454 // SDAGisms map cleanly to GISel concepts.
8455 if (NewTy.isVector())
8456 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(SizeInBits: 64) : LLT::scalar(SizeInBits: 32);
8457 NewTy = LLT::scalar(SizeInBits: llvm::bit_floor(Value: NewTy.getSizeInBits() - 1));
8458 unsigned NewTySize = NewTy.getSizeInBytes();
8459 assert(NewTySize > 0 && "Could not find appropriate type");
8460
8461 // If the new LLT cannot cover all of the remaining bits, then consider
8462 // issuing a (or a pair of) unaligned and overlapping load / store.
8463 unsigned Fast;
8464 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8465 MVT VT = getMVTForLLT(Ty);
8466 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8467 TLI.allowsMisalignedMemoryAccesses(
8468 VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8469 Flags: MachineMemOperand::MONone, &Fast) &&
8470 Fast)
8471 TySize = Size;
8472 else {
8473 Ty = NewTy;
8474 TySize = NewTySize;
8475 }
8476 }
8477
8478 if (++NumMemOps > Limit)
8479 return false;
8480
8481 MemOps.push_back(x: Ty);
8482 Size -= TySize;
8483 }
8484
8485 return true;
8486}
8487
8488static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
8489 if (Ty.isVector())
8490 return FixedVectorType::get(ElementType: IntegerType::get(C, NumBits: Ty.getScalarSizeInBits()),
8491 NumElts: Ty.getNumElements());
8492 return IntegerType::get(C, NumBits: Ty.getSizeInBits());
8493}
8494
8495// Get a vectorized representation of the memset value operand, GISel edition.
8496static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
8497 MachineRegisterInfo &MRI = *MIB.getMRI();
8498 unsigned NumBits = Ty.getScalarSizeInBits();
8499 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(VReg: Val, MRI);
8500 if (!Ty.isVector() && ValVRegAndVal) {
8501 APInt Scalar = ValVRegAndVal->Value.trunc(width: 8);
8502 APInt SplatVal = APInt::getSplat(NewLen: NumBits, V: Scalar);
8503 return MIB.buildConstant(Res: Ty, Val: SplatVal).getReg(Idx: 0);
8504 }
8505
8506 // Extend the byte value to the larger type, and then multiply by a magic
8507 // value 0x010101... in order to replicate it across every byte.
8508 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8509 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8510 return MIB.buildConstant(Res: Ty, Val: 0).getReg(Idx: 0);
8511 }
8512
8513 LLT ExtType = Ty.getScalarType();
8514 auto ZExt = MIB.buildZExtOrTrunc(Res: ExtType, Op: Val);
8515 if (NumBits > 8) {
8516 APInt Magic = APInt::getSplat(NewLen: NumBits, V: APInt(8, 0x01));
8517 auto MagicMI = MIB.buildConstant(Res: ExtType, Val: Magic);
8518 Val = MIB.buildMul(Dst: ExtType, Src0: ZExt, Src1: MagicMI).getReg(Idx: 0);
8519 }
8520
8521 // For vector types create a G_BUILD_VECTOR.
8522 if (Ty.isVector())
8523 Val = MIB.buildSplatBuildVector(Res: Ty, Src: Val).getReg(Idx: 0);
8524
8525 return Val;
8526}
8527
8528LegalizerHelper::LegalizeResult
8529LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8530 uint64_t KnownLen, Align Alignment,
8531 bool IsVolatile) {
8532 auto &MF = *MI.getParent()->getParent();
8533 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8534 auto &DL = MF.getDataLayout();
8535 LLVMContext &C = MF.getFunction().getContext();
8536
8537 assert(KnownLen != 0 && "Have a zero length memset length!");
8538
8539 bool DstAlignCanChange = false;
8540 MachineFrameInfo &MFI = MF.getFrameInfo();
8541 bool OptSize = shouldLowerMemFuncForSize(MF);
8542
8543 MachineInstr *FIDef = getOpcodeDef(Opcode: TargetOpcode::G_FRAME_INDEX, Reg: Dst, MRI);
8544 if (FIDef && !MFI.isFixedObjectIndex(ObjectIdx: FIDef->getOperand(i: 1).getIndex()))
8545 DstAlignCanChange = true;
8546
8547 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8548 std::vector<LLT> MemOps;
8549
8550 const auto &DstMMO = **MI.memoperands_begin();
8551 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8552
8553 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(VReg: Val, MRI);
8554 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8555
8556 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8557 Op: MemOp::Set(Size: KnownLen, DstAlignCanChange,
8558 DstAlign: Alignment,
8559 /*IsZeroMemset=*/IsZeroVal,
8560 /*IsVolatile=*/IsVolatile),
8561 DstAS: DstPtrInfo.getAddrSpace(), SrcAS: ~0u,
8562 FuncAttributes: MF.getFunction().getAttributes(), TLI))
8563 return UnableToLegalize;
8564
8565 if (DstAlignCanChange) {
8566 // Get an estimate of the type from the LLT.
8567 Type *IRTy = getTypeForLLT(Ty: MemOps[0], C);
8568 Align NewAlign = DL.getABITypeAlign(Ty: IRTy);
8569 if (NewAlign > Alignment) {
8570 Alignment = NewAlign;
8571 unsigned FI = FIDef->getOperand(i: 1).getIndex();
8572 // Give the stack frame object a larger alignment if needed.
8573 if (MFI.getObjectAlign(ObjectIdx: FI) < Alignment)
8574 MFI.setObjectAlignment(ObjectIdx: FI, Alignment);
8575 }
8576 }
8577
8578 MachineIRBuilder MIB(MI);
8579 // Find the largest store and generate the bit pattern for it.
8580 LLT LargestTy = MemOps[0];
8581 for (unsigned i = 1; i < MemOps.size(); i++)
8582 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8583 LargestTy = MemOps[i];
8584
8585 // The memset stored value is always defined as an s8, so in order to make it
8586 // work with larger store types we need to repeat the bit pattern across the
8587 // wider type.
8588 Register MemSetValue = getMemsetValue(Val, Ty: LargestTy, MIB);
8589
8590 if (!MemSetValue)
8591 return UnableToLegalize;
8592
8593 // Generate the stores. For each store type in the list, we generate the
8594 // matching store of that type to the destination address.
8595 LLT PtrTy = MRI.getType(Reg: Dst);
8596 unsigned DstOff = 0;
8597 unsigned Size = KnownLen;
8598 for (unsigned I = 0; I < MemOps.size(); I++) {
8599 LLT Ty = MemOps[I];
8600 unsigned TySize = Ty.getSizeInBytes();
8601 if (TySize > Size) {
8602 // Issuing an unaligned load / store pair that overlaps with the previous
8603 // pair. Adjust the offset accordingly.
8604 assert(I == MemOps.size() - 1 && I != 0);
8605 DstOff -= TySize - Size;
8606 }
8607
8608 // If this store is smaller than the largest store see whether we can get
8609 // the smaller value for free with a truncate.
8610 Register Value = MemSetValue;
8611 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8612 MVT VT = getMVTForLLT(Ty);
8613 MVT LargestVT = getMVTForLLT(Ty: LargestTy);
8614 if (!LargestTy.isVector() && !Ty.isVector() &&
8615 TLI.isTruncateFree(FromVT: LargestVT, ToVT: VT))
8616 Value = MIB.buildTrunc(Res: Ty, Op: MemSetValue).getReg(Idx: 0);
8617 else
8618 Value = getMemsetValue(Val, Ty, MIB);
8619 if (!Value)
8620 return UnableToLegalize;
8621 }
8622
8623 auto *StoreMMO = MF.getMachineMemOperand(MMO: &DstMMO, Offset: DstOff, Ty);
8624
8625 Register Ptr = Dst;
8626 if (DstOff != 0) {
8627 auto Offset =
8628 MIB.buildConstant(Res: LLT::scalar(SizeInBits: PtrTy.getSizeInBits()), Val: DstOff);
8629 Ptr = MIB.buildPtrAdd(Res: PtrTy, Op0: Dst, Op1: Offset).getReg(Idx: 0);
8630 }
8631
8632 MIB.buildStore(Val: Value, Addr: Ptr, MMO&: *StoreMMO);
8633 DstOff += Ty.getSizeInBytes();
8634 Size -= TySize;
8635 }
8636
8637 MI.eraseFromParent();
8638 return Legalized;
8639}
8640
8641LegalizerHelper::LegalizeResult
8642LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8643 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8644
8645 auto [Dst, Src, Len] = MI.getFirst3Regs();
8646
8647 const auto *MMOIt = MI.memoperands_begin();
8648 const MachineMemOperand *MemOp = *MMOIt;
8649 bool IsVolatile = MemOp->isVolatile();
8650
8651 // See if this is a constant length copy
8652 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(VReg: Len, MRI);
8653 // FIXME: support dynamically sized G_MEMCPY_INLINE
8654 assert(LenVRegAndVal &&
8655 "inline memcpy with dynamic size is not yet supported");
8656 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8657 if (KnownLen == 0) {
8658 MI.eraseFromParent();
8659 return Legalized;
8660 }
8661
8662 const auto &DstMMO = **MI.memoperands_begin();
8663 const auto &SrcMMO = **std::next(x: MI.memoperands_begin());
8664 Align DstAlign = DstMMO.getBaseAlign();
8665 Align SrcAlign = SrcMMO.getBaseAlign();
8666
8667 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8668 IsVolatile);
8669}
8670
8671LegalizerHelper::LegalizeResult
8672LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8673 uint64_t KnownLen, Align DstAlign,
8674 Align SrcAlign, bool IsVolatile) {
8675 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8676 return lowerMemcpy(MI, Dst, Src, KnownLen,
8677 Limit: std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8678 IsVolatile);
8679}
8680
8681LegalizerHelper::LegalizeResult
8682LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8683 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8684 Align SrcAlign, bool IsVolatile) {
8685 auto &MF = *MI.getParent()->getParent();
8686 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8687 auto &DL = MF.getDataLayout();
8688 LLVMContext &C = MF.getFunction().getContext();
8689
8690 assert(KnownLen != 0 && "Have a zero length memcpy length!");
8691
8692 bool DstAlignCanChange = false;
8693 MachineFrameInfo &MFI = MF.getFrameInfo();
8694 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
8695
8696 MachineInstr *FIDef = getOpcodeDef(Opcode: TargetOpcode::G_FRAME_INDEX, Reg: Dst, MRI);
8697 if (FIDef && !MFI.isFixedObjectIndex(ObjectIdx: FIDef->getOperand(i: 1).getIndex()))
8698 DstAlignCanChange = true;
8699
8700 // FIXME: infer better src pointer alignment like SelectionDAG does here.
8701 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8702 // if the memcpy is in a tail call position.
8703
8704 std::vector<LLT> MemOps;
8705
8706 const auto &DstMMO = **MI.memoperands_begin();
8707 const auto &SrcMMO = **std::next(x: MI.memoperands_begin());
8708 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8709 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8710
8711 if (!findGISelOptimalMemOpLowering(
8712 MemOps, Limit,
8713 Op: MemOp::Copy(Size: KnownLen, DstAlignCanChange, DstAlign: Alignment, SrcAlign,
8714 IsVolatile),
8715 DstAS: DstPtrInfo.getAddrSpace(), SrcAS: SrcPtrInfo.getAddrSpace(),
8716 FuncAttributes: MF.getFunction().getAttributes(), TLI))
8717 return UnableToLegalize;
8718
8719 if (DstAlignCanChange) {
8720 // Get an estimate of the type from the LLT.
8721 Type *IRTy = getTypeForLLT(Ty: MemOps[0], C);
8722 Align NewAlign = DL.getABITypeAlign(Ty: IRTy);
8723
8724 // Don't promote to an alignment that would require dynamic stack
8725 // realignment.
8726 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8727 if (!TRI->hasStackRealignment(MF))
8728 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(Alignment: NewAlign))
8729 NewAlign = NewAlign.previous();
8730
8731 if (NewAlign > Alignment) {
8732 Alignment = NewAlign;
8733 unsigned FI = FIDef->getOperand(i: 1).getIndex();
8734 // Give the stack frame object a larger alignment if needed.
8735 if (MFI.getObjectAlign(ObjectIdx: FI) < Alignment)
8736 MFI.setObjectAlignment(ObjectIdx: FI, Alignment);
8737 }
8738 }
8739
8740 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8741
8742 MachineIRBuilder MIB(MI);
8743 // Now we need to emit a pair of load and stores for each of the types we've
8744 // collected. I.e. for each type, generate a load from the source pointer of
8745 // that type width, and then generate a corresponding store to the dest buffer
8746 // of that value loaded. This can result in a sequence of loads and stores
8747 // mixed types, depending on what the target specifies as good types to use.
8748 unsigned CurrOffset = 0;
8749 unsigned Size = KnownLen;
8750 for (auto CopyTy : MemOps) {
8751 // Issuing an unaligned load / store pair that overlaps with the previous
8752 // pair. Adjust the offset accordingly.
8753 if (CopyTy.getSizeInBytes() > Size)
8754 CurrOffset -= CopyTy.getSizeInBytes() - Size;
8755
8756 // Construct MMOs for the accesses.
8757 auto *LoadMMO =
8758 MF.getMachineMemOperand(MMO: &SrcMMO, Offset: CurrOffset, Size: CopyTy.getSizeInBytes());
8759 auto *StoreMMO =
8760 MF.getMachineMemOperand(MMO: &DstMMO, Offset: CurrOffset, Size: CopyTy.getSizeInBytes());
8761
8762 // Create the load.
8763 Register LoadPtr = Src;
8764 Register Offset;
8765 if (CurrOffset != 0) {
8766 LLT SrcTy = MRI.getType(Reg: Src);
8767 Offset = MIB.buildConstant(Res: LLT::scalar(SizeInBits: SrcTy.getSizeInBits()), Val: CurrOffset)
8768 .getReg(Idx: 0);
8769 LoadPtr = MIB.buildPtrAdd(Res: SrcTy, Op0: Src, Op1: Offset).getReg(Idx: 0);
8770 }
8771 auto LdVal = MIB.buildLoad(Res: CopyTy, Addr: LoadPtr, MMO&: *LoadMMO);
8772
8773 // Create the store.
8774 Register StorePtr = Dst;
8775 if (CurrOffset != 0) {
8776 LLT DstTy = MRI.getType(Reg: Dst);
8777 StorePtr = MIB.buildPtrAdd(Res: DstTy, Op0: Dst, Op1: Offset).getReg(Idx: 0);
8778 }
8779 MIB.buildStore(Val: LdVal, Addr: StorePtr, MMO&: *StoreMMO);
8780 CurrOffset += CopyTy.getSizeInBytes();
8781 Size -= CopyTy.getSizeInBytes();
8782 }
8783
8784 MI.eraseFromParent();
8785 return Legalized;
8786}
8787
8788LegalizerHelper::LegalizeResult
8789LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
8790 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
8791 bool IsVolatile) {
8792 auto &MF = *MI.getParent()->getParent();
8793 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8794 auto &DL = MF.getDataLayout();
8795 LLVMContext &C = MF.getFunction().getContext();
8796
8797 assert(KnownLen != 0 && "Have a zero length memmove length!");
8798
8799 bool DstAlignCanChange = false;
8800 MachineFrameInfo &MFI = MF.getFrameInfo();
8801 bool OptSize = shouldLowerMemFuncForSize(MF);
8802 Align Alignment = std::min(a: DstAlign, b: SrcAlign);
8803
8804 MachineInstr *FIDef = getOpcodeDef(Opcode: TargetOpcode::G_FRAME_INDEX, Reg: Dst, MRI);
8805 if (FIDef && !MFI.isFixedObjectIndex(ObjectIdx: FIDef->getOperand(i: 1).getIndex()))
8806 DstAlignCanChange = true;
8807
8808 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
8809 std::vector<LLT> MemOps;
8810
8811 const auto &DstMMO = **MI.memoperands_begin();
8812 const auto &SrcMMO = **std::next(x: MI.memoperands_begin());
8813 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8814 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8815
8816 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
8817 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
8818 // same thing here.
8819 if (!findGISelOptimalMemOpLowering(
8820 MemOps, Limit,
8821 Op: MemOp::Copy(Size: KnownLen, DstAlignCanChange, DstAlign: Alignment, SrcAlign,
8822 /*IsVolatile*/ true),
8823 DstAS: DstPtrInfo.getAddrSpace(), SrcAS: SrcPtrInfo.getAddrSpace(),
8824 FuncAttributes: MF.getFunction().getAttributes(), TLI))
8825 return UnableToLegalize;
8826
8827 if (DstAlignCanChange) {
8828 // Get an estimate of the type from the LLT.
8829 Type *IRTy = getTypeForLLT(Ty: MemOps[0], C);
8830 Align NewAlign = DL.getABITypeAlign(Ty: IRTy);
8831
8832 // Don't promote to an alignment that would require dynamic stack
8833 // realignment.
8834 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8835 if (!TRI->hasStackRealignment(MF))
8836 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(Alignment: NewAlign))
8837 NewAlign = NewAlign.previous();
8838
8839 if (NewAlign > Alignment) {
8840 Alignment = NewAlign;
8841 unsigned FI = FIDef->getOperand(i: 1).getIndex();
8842 // Give the stack frame object a larger alignment if needed.
8843 if (MFI.getObjectAlign(ObjectIdx: FI) < Alignment)
8844 MFI.setObjectAlignment(ObjectIdx: FI, Alignment);
8845 }
8846 }
8847
8848 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
8849
8850 MachineIRBuilder MIB(MI);
8851 // Memmove requires that we perform the loads first before issuing the stores.
8852 // Apart from that, this loop is pretty much doing the same thing as the
8853 // memcpy codegen function.
8854 unsigned CurrOffset = 0;
8855 SmallVector<Register, 16> LoadVals;
8856 for (auto CopyTy : MemOps) {
8857 // Construct MMO for the load.
8858 auto *LoadMMO =
8859 MF.getMachineMemOperand(MMO: &SrcMMO, Offset: CurrOffset, Size: CopyTy.getSizeInBytes());
8860
8861 // Create the load.
8862 Register LoadPtr = Src;
8863 if (CurrOffset != 0) {
8864 LLT SrcTy = MRI.getType(Reg: Src);
8865 auto Offset =
8866 MIB.buildConstant(Res: LLT::scalar(SizeInBits: SrcTy.getSizeInBits()), Val: CurrOffset);
8867 LoadPtr = MIB.buildPtrAdd(Res: SrcTy, Op0: Src, Op1: Offset).getReg(Idx: 0);
8868 }
8869 LoadVals.push_back(Elt: MIB.buildLoad(Res: CopyTy, Addr: LoadPtr, MMO&: *LoadMMO).getReg(Idx: 0));
8870 CurrOffset += CopyTy.getSizeInBytes();
8871 }
8872
8873 CurrOffset = 0;
8874 for (unsigned I = 0; I < MemOps.size(); ++I) {
8875 LLT CopyTy = MemOps[I];
8876 // Now store the values loaded.
8877 auto *StoreMMO =
8878 MF.getMachineMemOperand(MMO: &DstMMO, Offset: CurrOffset, Size: CopyTy.getSizeInBytes());
8879
8880 Register StorePtr = Dst;
8881 if (CurrOffset != 0) {
8882 LLT DstTy = MRI.getType(Reg: Dst);
8883 auto Offset =
8884 MIB.buildConstant(Res: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), Val: CurrOffset);
8885 StorePtr = MIB.buildPtrAdd(Res: DstTy, Op0: Dst, Op1: Offset).getReg(Idx: 0);
8886 }
8887 MIB.buildStore(Val: LoadVals[I], Addr: StorePtr, MMO&: *StoreMMO);
8888 CurrOffset += CopyTy.getSizeInBytes();
8889 }
8890 MI.eraseFromParent();
8891 return Legalized;
8892}
8893
8894LegalizerHelper::LegalizeResult
8895LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
8896 const unsigned Opc = MI.getOpcode();
8897 // This combine is fairly complex so it's not written with a separate
8898 // matcher function.
8899 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
8900 Opc == TargetOpcode::G_MEMSET) &&
8901 "Expected memcpy like instruction");
8902
8903 auto MMOIt = MI.memoperands_begin();
8904 const MachineMemOperand *MemOp = *MMOIt;
8905
8906 Align DstAlign = MemOp->getBaseAlign();
8907 Align SrcAlign;
8908 auto [Dst, Src, Len] = MI.getFirst3Regs();
8909
8910 if (Opc != TargetOpcode::G_MEMSET) {
8911 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
8912 MemOp = *(++MMOIt);
8913 SrcAlign = MemOp->getBaseAlign();
8914 }
8915
8916 // See if this is a constant length copy
8917 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(VReg: Len, MRI);
8918 if (!LenVRegAndVal)
8919 return UnableToLegalize;
8920 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8921
8922 if (KnownLen == 0) {
8923 MI.eraseFromParent();
8924 return Legalized;
8925 }
8926
8927 bool IsVolatile = MemOp->isVolatile();
8928 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
8929 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8930 IsVolatile);
8931
8932 // Don't try to optimize volatile.
8933 if (IsVolatile)
8934 return UnableToLegalize;
8935
8936 if (MaxLen && KnownLen > MaxLen)
8937 return UnableToLegalize;
8938
8939 if (Opc == TargetOpcode::G_MEMCPY) {
8940 auto &MF = *MI.getParent()->getParent();
8941 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8942 bool OptSize = shouldLowerMemFuncForSize(MF);
8943 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
8944 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
8945 IsVolatile);
8946 }
8947 if (Opc == TargetOpcode::G_MEMMOVE)
8948 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
8949 if (Opc == TargetOpcode::G_MEMSET)
8950 return lowerMemset(MI, Dst, Val: Src, KnownLen, Alignment: DstAlign, IsVolatile);
8951 return UnableToLegalize;
8952}
8953

source code of llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp