PPCFastISel.cpp source code [llvm/lib/Target/PowerPC/PPCFastISel.cpp]

1	//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the PowerPC-specific support for the FastISel class. Some
10	// of the target-specific code is generated by tablegen in the file
11	// PPCGenFastISel.inc, which is #included here.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "MCTargetDesc/PPCPredicates.h"
16	#include "PPC.h"
17	#include "PPCCCState.h"
18	#include "PPCCallingConv.h"
19	#include "PPCISelLowering.h"
20	#include "PPCMachineFunctionInfo.h"
21	#include "PPCSubtarget.h"
22	#include "PPCTargetMachine.h"
23	#include "llvm/CodeGen/CallingConvLower.h"
24	#include "llvm/CodeGen/FastISel.h"
25	#include "llvm/CodeGen/FunctionLoweringInfo.h"
26	#include "llvm/CodeGen/MachineConstantPool.h"
27	#include "llvm/CodeGen/MachineFrameInfo.h"
28	#include "llvm/CodeGen/MachineInstrBuilder.h"
29	#include "llvm/CodeGen/MachineRegisterInfo.h"
30	#include "llvm/CodeGen/TargetLowering.h"
31	#include "llvm/IR/CallingConv.h"
32	#include "llvm/IR/GetElementPtrTypeIterator.h"
33	#include "llvm/IR/GlobalAlias.h"
34	#include "llvm/IR/GlobalVariable.h"
35	#include "llvm/IR/IntrinsicInst.h"
36	#include "llvm/IR/Operator.h"
37	#include "llvm/Support/Debug.h"
38	#include "llvm/Target/TargetMachine.h"
39
40	//===----------------------------------------------------------------------===//
41	//
42	// TBD:
43	// fastLowerArguments: Handle simple cases.
44	// PPCMaterializeGV: Handle TLS.
45	// SelectCall: Handle function pointers.
46	// SelectCall: Handle multi-register return values.
47	// SelectCall: Optimize away nops for local calls.
48	// processCallArgs: Handle bit-converted arguments.
49	// finishCall: Handle multi-register return values.
50	// PPCComputeAddress: Handle parameter references as FrameIndex's.
51	// PPCEmitCmp: Handle immediate as operand 1.
52	// SelectCall: Handle small byval arguments.
53	// SelectIntrinsicCall: Implement.
54	// SelectSelect: Implement.
55	// Consider factoring isTypeLegal into the base class.
56	// Implement switches and jump tables.
57	//
58	//===----------------------------------------------------------------------===//
59	using namespace llvm;
60
61	#define DEBUG_TYPE "ppcfastisel"
62
63	namespace {
64
65	struct Address {
66	enum {
67	RegBase,
68	FrameIndexBase
69	} BaseType;
70
71	union {
72	unsigned Reg;
73	int FI;
74	} Base;
75
76	int64_t Offset;
77
78	// Innocuous defaults for our address.
79	Address()
80	: BaseType(RegBase), Offset(`0`) {
81	Base.Reg = `0`;
82	}
83	};
84
85	class PPCFastISel final : public FastISel {
86
87	const TargetMachine &TM;
88	const PPCSubtarget *Subtarget;
89	PPCFunctionInfo *PPCFuncInfo;
90	const TargetInstrInfo &TII;
91	const TargetLowering &TLI;
92	LLVMContext *Context;
93
94	public:
95	explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
96	const TargetLibraryInfo *LibInfo)
97	: FastISel (FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
98	Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
99	PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
100	TII(Subtarget->getInstrInfo()), TLI(Subtarget->getTargetLowering()),
101	Context(&FuncInfo.Fn->getContext()) {}
102
103	// Backend specific FastISel code.
104	private:
105	bool fastSelectInstruction(const Instruction *I) override;
106	unsigned fastMaterializeConstant(const Constant *C) override;
107	unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
108	bool tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
109	const LoadInst *LI) override;
110	bool fastLowerArguments() override;
111	unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
112	unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
113	const TargetRegisterClass *RC,
114	unsigned Op0, uint64_t Imm);
115	unsigned fastEmitInst_r(unsigned MachineInstOpcode,
116	const TargetRegisterClass RC, unsigned* Op0);
117	unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
118	const TargetRegisterClass *RC,
119	unsigned Op0, unsigned Op1);
120
121	bool fastLowerCall(CallLoweringInfo &CLI) override;
122
123	// Instruction selection routines.
124	private:
125	bool SelectLoad(const Instruction *I);
126	bool SelectStore(const Instruction *I);
127	bool SelectBranch(const Instruction *I);
128	bool SelectIndirectBr(const Instruction *I);
129	bool SelectFPExt(const Instruction *I);
130	bool SelectFPTrunc(const Instruction *I);
131	bool SelectIToFP(const Instruction I, bool* IsSigned);
132	bool SelectFPToI(const Instruction I, bool* IsSigned);
133	bool SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode);
134	bool SelectRet(const Instruction *I);
135	bool SelectTrunc(const Instruction *I);
136	bool SelectIntExt(const Instruction *I);
137
138	// Utility routines.
139	private:
140	bool isTypeLegal(Type *Ty, MVT &VT);
141	bool isLoadTypeLegal(Type *Ty, MVT &VT);
142	bool isValueAvailable(const Value V) const*;
143	bool isVSFRCRegClass(const TargetRegisterClass RC) const* {
144	return RC->getID() == PPC::VSFRCRegClassID;
145	}
146	bool isVSSRCRegClass(const TargetRegisterClass RC) const* {
147	return RC->getID() == PPC::VSSRCRegClassID;
148	}
149	unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
150	unsigned SrcReg, unsigned Flag = `0`,
151	unsigned SubReg = `0`) {
152	Register TmpReg = createResultReg(RC: ToRC);
153	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
154	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: TmpReg).addReg(RegNo: SrcReg, flags: Flag, SubReg);
155	return TmpReg;
156	}
157	bool PPCEmitCmp(const Value Src1Value, const* Value *Src2Value,
158	bool isZExt, unsigned DestReg,
159	const PPC::Predicate Pred);
160	bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
161	const TargetRegisterClass RC, bool* IsZExt = true,
162	unsigned FP64LoadOpc = PPC::LFD);
163	bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
164	bool PPCComputeAddress(const Value *Obj, Address &Addr);
165	void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
166	unsigned &IndexReg);
167	bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
168	unsigned DestReg, bool IsZExt);
169	unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
170	unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
171	unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
172	bool UseSExt = true);
173	unsigned PPCMaterialize32BitInt(int64_t Imm,
174	const TargetRegisterClass *RC);
175	unsigned PPCMaterialize64BitInt(int64_t Imm,
176	const TargetRegisterClass *RC);
177	unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
178	unsigned SrcReg, bool IsSigned);
179	unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
180
181	// Call handling routines.
182	private:
183	bool processCallArgs(SmallVectorImpl<Value*> &Args,
184	SmallVectorImpl<unsigned> &ArgRegs,
185	SmallVectorImpl<MVT> &ArgVTs,
186	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
187	SmallVectorImpl<unsigned> &RegArgs,
188	CallingConv::ID CC,
189	unsigned &NumBytes,
190	bool IsVarArg);
191	bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
192
193	private:
194	#include "PPCGenFastISel.inc"
195
196	};
197
198	} // end anonymous namespace
199
200	static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
201	switch (Pred) {
202	// These are not representable with any single compare.
203	case CmpInst::FCMP_FALSE:
204	case CmpInst::FCMP_TRUE:
205	// Major concern about the following 6 cases is NaN result. The comparison
206	// result consists of 4 bits, indicating lt, eq, gt and un (unordered),
207	// only one of which will be set. The result is generated by fcmpu
208	// instruction. However, bc instruction only inspects one of the first 3
209	// bits, so when un is set, bc instruction may jump to an undesired
210	// place.
211	//
212	// More specifically, if we expect an unordered comparison and un is set, we
213	// expect to always go to true branch; in such case UEQ, UGT and ULT still
214	// give false, which are undesired; but UNE, UGE, ULE happen to give true,
215	// since they are tested by inspecting !eq, !lt, !gt, respectively.
216	//
217	// Similarly, for ordered comparison, when un is set, we always expect the
218	// result to be false. In such case OGT, OLT and OEQ is good, since they are
219	// actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
220	// and ONE are tested through !lt, !gt and !eq, and these are true.
221	case CmpInst::FCMP_UEQ:
222	case CmpInst::FCMP_UGT:
223	case CmpInst::FCMP_ULT:
224	case CmpInst::FCMP_OGE:
225	case CmpInst::FCMP_OLE:
226	case CmpInst::FCMP_ONE:
227	default:
228	return std::nullopt;
229
230	case CmpInst::FCMP_OEQ:
231	case CmpInst::ICMP_EQ:
232	return PPC::PRED_EQ;
233
234	case CmpInst::FCMP_OGT:
235	case CmpInst::ICMP_UGT:
236	case CmpInst::ICMP_SGT:
237	return PPC::PRED_GT;
238
239	case CmpInst::FCMP_UGE:
240	case CmpInst::ICMP_UGE:
241	case CmpInst::ICMP_SGE:
242	return PPC::PRED_GE;
243
244	case CmpInst::FCMP_OLT:
245	case CmpInst::ICMP_ULT:
246	case CmpInst::ICMP_SLT:
247	return PPC::PRED_LT;
248
249	case CmpInst::FCMP_ULE:
250	case CmpInst::ICMP_ULE:
251	case CmpInst::ICMP_SLE:
252	return PPC::PRED_LE;
253
254	case CmpInst::FCMP_UNE:
255	case CmpInst::ICMP_NE:
256	return PPC::PRED_NE;
257
258	case CmpInst::FCMP_ORD:
259	return PPC::PRED_NU;
260
261	case CmpInst::FCMP_UNO:
262	return PPC::PRED_UN;
263	}
264	}
265
266	// Determine whether the type Ty is simple enough to be handled by
267	// fast-isel, and return its equivalent machine type in VT.
268	// FIXME: Copied directly from ARM -- factor into base class?
269	bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
270	EVT Evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
271
272	// Only handle simple types.
273	if (Evt == MVT::Other \|\| !Evt.isSimple()) return false;
274	VT = Evt.getSimpleVT();
275
276	// Handle all legal types, i.e. a register that will directly hold this
277	// value.
278	return TLI.isTypeLegal(VT);
279	}
280
281	// Determine whether the type Ty is simple enough to be handled by
282	// fast-isel as a load target, and return its equivalent machine type in VT.
283	bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
284	if (isTypeLegal(Ty, VT)) return true;
285
286	// If this is a type than can be sign or zero-extended to a basic operation
287	// go ahead and accept it now.
288	if (VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32) {
289	return true;
290	}
291
292	return false;
293	}
294
295	bool PPCFastISel::isValueAvailable(const Value V) const* {
296	if (!isa<Instruction>(Val: V))
297	return true;
298
299	const auto *I = cast<Instruction>(Val: V);
300	return FuncInfo.MBBMap [I->getParent()] == FuncInfo.MBB;
301	}
302
303	// Given a value Obj, create an Address object Addr that represents its
304	// address. Return false if we can't handle it.
305	bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
306	const User U = nullptr*;
307	unsigned Opcode = Instruction::UserOp1;
308	if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
309	// Don't walk into other basic blocks unless the object is an alloca from
310	// another block, otherwise it may not have a virtual register assigned.
311	if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) \|\|
312	FuncInfo.MBBMap [I->getParent()] == FuncInfo.MBB) {
313	Opcode = I->getOpcode();
314	U = I;
315	}
316	} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
317	Opcode = C->getOpcode();
318	U = C;
319	}
320
321	switch (Opcode) {
322	default:
323	break;
324	case Instruction::BitCast:
325	// Look through bitcasts.
326	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
327	case Instruction::IntToPtr:
328	// Look past no-op inttoptrs.
329	if (TLI.getValueType(DL, Ty: U->getOperand(i: `0`)->getType()) ==
330	TLI.getPointerTy(DL))
331	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
332	break;
333	case Instruction::PtrToInt:
334	// Look past no-op ptrtoints.
335	if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
336	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
337	break;
338	case Instruction::GetElementPtr: {
339	Address SavedAddr = Addr;
340	int64_t TmpOffset = Addr.Offset;
341
342	// Iterate through the GEP folding the constants into offsets where
343	// we can.
344	gep_type_iterator GTI = gep_type_begin(GEP: U);
345	for (User::const_op_iterator II = U->op_begin() + `1`, IE = U->op_end();
346	II != IE; ++II, ++GTI) {
347	const Value Op = II;
348	if (StructType *STy = GTI.getStructTypeOrNull()) {
349	const StructLayout *SL = DL.getStructLayout(Ty: STy);
350	unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
351	TmpOffset += SL->getElementOffset(Idx);
352	} else {
353	uint64_t S = GTI.getSequentialElementStride(DL);
354	for (;;) {
355	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
356	// Constant-offset addressing.
357	TmpOffset += CI->getSExtValue() * S;
358	break;
359	}
360	if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
361	// A compatible add with a constant operand. Fold the constant.
362	ConstantInt *CI =
363	cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `1`));
364	TmpOffset += CI->getSExtValue() * S;
365	// Iterate on the other operand.
366	Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `0`);
367	continue;
368	}
369	// Unsupported
370	goto unsupported_gep;
371	}
372	}
373	}
374
375	// Try to grab the base operand now.
376	Addr.Offset = TmpOffset;
377	if (PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr)) return true;
378
379	// We failed, restore everything and try the other options.
380	Addr = SavedAddr;
381
382	unsupported_gep:
383	break;
384	}
385	case Instruction::Alloca: {
386	const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
387	DenseMap<const AllocaInst, int*>::iterator SI =
388	FuncInfo.StaticAllocaMap.find(Val: AI);
389	if (SI != FuncInfo.StaticAllocaMap.end()) {
390	Addr.BaseType = Address::FrameIndexBase;
391	Addr.Base.FI = SI ->second;
392	return true;
393	}
394	break;
395	}
396	}
397
398	// FIXME: References to parameters fall through to the behavior
399	// below. They should be able to reference a frame index since
400	// they are stored to the stack, so we can get "ld rx, offset(r1)"
401	// instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
402	// just contain the parameter. Try to handle this with a FI.
403
404	// Try to get this in a register if nothing else has worked.
405	if (Addr.Base.Reg == `0`)
406	Addr.Base.Reg = getRegForValue(V: Obj);
407
408	// Prevent assignment of base register to X0, which is inappropriate
409	// for loads and stores alike.
410	if (Addr.Base.Reg != `0`)
411	MRI.setRegClass(Reg: Addr.Base.Reg, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
412
413	return Addr.Base.Reg != `0`;
414	}
415
416	// Fix up some addresses that can't be used directly. For example, if
417	// an offset won't fit in an instruction field, we may need to move it
418	// into an index register.
419	void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
420	unsigned &IndexReg) {
421
422	// Check whether the offset fits in the instruction field.
423	if (!isInt<`16`>(x: Addr.Offset))
424	UseOffset = false;
425
426	// If this is a stack pointer and the offset needs to be simplified then
427	// put the alloca address into a register, set the base type back to
428	// register and continue. This should almost never happen.
429	if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
430	Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
431	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::Opcode: ADDI8),
432	ResultReg).addFrameIndex(Addr.Base.FI).addImm(`0`);
433	Addr.Base.Reg = ResultReg;
434	Addr.BaseType = Address::RegBase;
435	}
436
437	if (!UseOffset) {
438	IntegerType OffsetTy = Type::getInt64Ty(C&: Context);
439	const ConstantInt *Offset = ConstantInt::getSigned(Ty: OffsetTy, V: Addr.Offset);
440	IndexReg = PPCMaterializeInt(CI: Offset, MVT::VT: i64);
441	assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
442	}
443	}
444
445	// Emit a load instruction if possible, returning true if we succeeded,
446	// otherwise false. See commentary below for how the register class of
447	// the load is determined.
448	bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
449	const TargetRegisterClass *RC,
450	bool IsZExt, unsigned FP64LoadOpc) {
451	unsigned Opc;
452	bool UseOffset = true;
453	bool HasSPE = Subtarget->hasSPE();
454
455	// If ResultReg is given, it determines the register class of the load.
456	// Otherwise, RC is the register class to use. If the result of the
457	// load isn't anticipated in this block, both may be zero, in which
458	// case we must make a conservative guess. In particular, don't assign
459	// R0 or X0 to the result register, as the result may be used in a load,
460	// store, add-immediate, or isel that won't permit this. (Though
461	// perhaps the spill and reload of live-exit values would handle this?)
462	const TargetRegisterClass *UseRC =
463	(ResultReg ? MRI.getRegClass(Reg: ResultReg) :
464	(RC ? RC :
465	(VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
466	(VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
467	(VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
468	&PPC::GPRC_and_GPRC_NOR0RegClass)))));
469
470	bool Is32BitInt = UseRC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
471
472	switch (VT.SimpleTy) {
473	default: // e.g., vector types not handled
474	return false;
475	case MVT::i8:
476	Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
477	break;
478	case MVT::i16:
479	Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
480	: (Is32BitInt ? PPC::LHA : PPC::LHA8));
481	break;
482	case MVT::i32:
483	Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
484	: (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
485	if ((Opc == PPC::LWA \|\| Opc == PPC::LWA_32) && ((Addr.Offset & `3`) != `0`))
486	UseOffset = false;
487	break;
488	case MVT::i64:
489	Opc = PPC::LD;
490	assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
491	"64-bit load with 32-bit target??");
492	UseOffset = ((Addr.Offset & `3`) == `0`);
493	break;
494	case MVT::f32:
495	Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
496	break;
497	case MVT::f64:
498	Opc = FP64LoadOpc;
499	break;
500	}
501
502	// If necessary, materialize the offset into a register and use
503	// the indexed form. Also handle stack pointers with special needs.
504	unsigned IndexReg = `0`;
505	PPCSimplifyAddress(Addr, UseOffset, IndexReg);
506
507	// If this is a potential VSX load with an offset of 0, a VSX indexed load can
508	// be used.
509	bool IsVSSRC = isVSSRCRegClass(RC: UseRC);
510	bool IsVSFRC = isVSFRCRegClass(RC: UseRC);
511	bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
512	bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
513	if ((Is32VSXLoad \|\| Is64VSXLoad) &&
514	(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
515	(Addr.Offset == `0`)) {
516	UseOffset = false;
517	}
518
519	if (ResultReg == `0`)
520	ResultReg = createResultReg(RC: UseRC);
521
522	// Note: If we still have a frame index here, we know the offset is
523	// in range, as otherwise PPCSimplifyAddress would have converted it
524	// into a RegBase.
525	if (Addr.BaseType == Address::FrameIndexBase) {
526	// VSX only provides an indexed load.
527	if (Is32VSXLoad \|\| Is64VSXLoad) return false;
528
529	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
530	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
531	Offset: Addr.Offset),
532	F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
533	BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
534
535	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
536	.addImm(Val: Addr.Offset).addFrameIndex(Idx: Addr.Base.FI).addMemOperand(MMO);
537
538	// Base reg with offset in range.
539	} else if (UseOffset) {
540	// VSX only provides an indexed load.
541	if (Is32VSXLoad \|\| Is64VSXLoad) return false;
542
543	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
544	.addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
545
546	// Indexed form.
547	} else {
548	// Get the RR opcode corresponding to the RI one. FIXME: It would be
549	// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
550	// is hard to get at.
551	switch (Opc) {
552	default: llvm_unreachable("Unexpected opcode!");
553	case PPC::LBZ: Opc = PPC::LBZX; break;
554	case PPC::LBZ8: Opc = PPC::LBZX8; break;
555	case PPC::LHZ: Opc = PPC::LHZX; break;
556	case PPC::LHZ8: Opc = PPC::LHZX8; break;
557	case PPC::LHA: Opc = PPC::LHAX; break;
558	case PPC::LHA8: Opc = PPC::LHAX8; break;
559	case PPC::LWZ: Opc = PPC::LWZX; break;
560	case PPC::LWZ8: Opc = PPC::LWZX8; break;
561	case PPC::LWA: Opc = PPC::LWAX; break;
562	case PPC::LWA_32: Opc = PPC::LWAX_32; break;
563	case PPC::LD: Opc = PPC::LDX; break;
564	case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
565	case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
566	case PPC::EVLDD: Opc = PPC::EVLDDX; break;
567	case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
568	}
569
570	auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
571	DestReg: ResultReg);
572
573	// If we have an index register defined we use it in the store inst,
574	// otherwise we use X0 as base as it makes the vector instructions to
575	// use zero in the computation of the effective address regardless the
576	// content of the register.
577	if (IndexReg)
578	MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
579	else
580	MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
581	}
582
583	return true;
584	}
585
586	// Attempt to fast-select a load instruction.
587	bool PPCFastISel::SelectLoad(const Instruction *I) {
588	// FIXME: No atomic loads are supported.
589	if (cast<LoadInst>(Val: I)->isAtomic())
590	return false;
591
592	// Verify we have a legal type before going any further.
593	MVT VT;
594	if (!isLoadTypeLegal(Ty: I->getType(), VT))
595	return false;
596
597	// See if we can handle this address.
598	Address Addr;
599	if (!PPCComputeAddress(Obj: I->getOperand(i: `0`), Addr))
600	return false;
601
602	// Look at the currently assigned register for this instruction
603	// to determine the required register class. This is necessary
604	// to constrain RA from using R0/X0 when this is not legal.
605	Register AssignedReg = FuncInfo.ValueMap [I];
606	const TargetRegisterClass *RC =
607	AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
608
609	Register ResultReg = `0`;
610	if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
611	Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
612	return false;
613	updateValueMap(I, Reg: ResultReg);
614	return true;
615	}
616
617	// Emit a store instruction to store SrcReg at Addr.
618	bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
619	assert(SrcReg && "Nothing to store!");
620	unsigned Opc;
621	bool UseOffset = true;
622
623	const TargetRegisterClass *RC = MRI.getRegClass(Reg: SrcReg);
624	bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
625
626	switch (VT.SimpleTy) {
627	default: // e.g., vector types not handled
628	return false;
629	case MVT::i8:
630	Opc = Is32BitInt ? PPC::STB : PPC::STB8;
631	break;
632	case MVT::i16:
633	Opc = Is32BitInt ? PPC::STH : PPC::STH8;
634	break;
635	case MVT::i32:
636	assert(Is32BitInt && "Not GPRC for i32??");
637	Opc = PPC::STW;
638	break;
639	case MVT::i64:
640	Opc = PPC::STD;
641	UseOffset = ((Addr.Offset & `3`) == `0`);
642	break;
643	case MVT::f32:
644	Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
645	break;
646	case MVT::f64:
647	Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
648	break;
649	}
650
651	// If necessary, materialize the offset into a register and use
652	// the indexed form. Also handle stack pointers with special needs.
653	unsigned IndexReg = `0`;
654	PPCSimplifyAddress(Addr, UseOffset, IndexReg);
655
656	// If this is a potential VSX store with an offset of 0, a VSX indexed store
657	// can be used.
658	bool IsVSSRC = isVSSRCRegClass(RC);
659	bool IsVSFRC = isVSFRCRegClass(RC);
660	bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
661	bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
662	if ((Is32VSXStore \|\| Is64VSXStore) &&
663	(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
664	(Addr.Offset == `0`)) {
665	UseOffset = false;
666	}
667
668	// Note: If we still have a frame index here, we know the offset is
669	// in range, as otherwise PPCSimplifyAddress would have converted it
670	// into a RegBase.
671	if (Addr.BaseType == Address::FrameIndexBase) {
672	// VSX only provides an indexed store.
673	if (Is32VSXStore \|\| Is64VSXStore) return false;
674
675	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
676	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
677	Offset: Addr.Offset),
678	F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
679	BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
680
681	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
682	.addReg(RegNo: SrcReg)
683	.addImm(Val: Addr.Offset)
684	.addFrameIndex(Idx: Addr.Base.FI)
685	.addMemOperand(MMO);
686
687	// Base reg with offset in range.
688	} else if (UseOffset) {
689	// VSX only provides an indexed store.
690	if (Is32VSXStore \|\| Is64VSXStore)
691	return false;
692
693	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
694	.addReg(RegNo: SrcReg).addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
695
696	// Indexed form.
697	} else {
698	// Get the RR opcode corresponding to the RI one. FIXME: It would be
699	// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
700	// is hard to get at.
701	switch (Opc) {
702	default: llvm_unreachable("Unexpected opcode!");
703	case PPC::STB: Opc = PPC::STBX; break;
704	case PPC::STH : Opc = PPC::STHX; break;
705	case PPC::STW : Opc = PPC::STWX; break;
706	case PPC::STB8: Opc = PPC::STBX8; break;
707	case PPC::STH8: Opc = PPC::STHX8; break;
708	case PPC::STW8: Opc = PPC::STWX8; break;
709	case PPC::STD: Opc = PPC::STDX; break;
710	case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
711	case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
712	case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
713	case PPC::SPESTW: Opc = PPC::SPESTWX; break;
714	}
715
716	auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
717	.addReg(RegNo: SrcReg);
718
719	// If we have an index register defined we use it in the store inst,
720	// otherwise we use X0 as base as it makes the vector instructions to
721	// use zero in the computation of the effective address regardless the
722	// content of the register.
723	if (IndexReg)
724	MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
725	else
726	MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
727	}
728
729	return true;
730	}
731
732	// Attempt to fast-select a store instruction.
733	bool PPCFastISel::SelectStore(const Instruction *I) {
734	Value *Op0 = I->getOperand(i: `0`);
735	unsigned SrcReg = `0`;
736
737	// FIXME: No atomics loads are supported.
738	if (cast<StoreInst>(Val: I)->isAtomic())
739	return false;
740
741	// Verify we have a legal type before going any further.
742	MVT VT;
743	if (!isLoadTypeLegal(Ty: Op0->getType(), VT))
744	return false;
745
746	// Get the value to be stored into a register.
747	SrcReg = getRegForValue(V: Op0);
748	if (SrcReg == `0`)
749	return false;
750
751	// See if we can handle this address.
752	Address Addr;
753	if (!PPCComputeAddress(Obj: I->getOperand(i: `1`), Addr))
754	return false;
755
756	if (!PPCEmitStore(VT, SrcReg, Addr))
757	return false;
758
759	return true;
760	}
761
762	// Attempt to fast-select a branch instruction.
763	bool PPCFastISel::SelectBranch(const Instruction *I) {
764	const BranchInst *BI = cast<BranchInst>(Val: I);
765	MachineBasicBlock *BrBB = FuncInfo.MBB;
766	MachineBasicBlock *TBB = FuncInfo.MBBMap [BI->getSuccessor(i: `0`)];
767	MachineBasicBlock *FBB = FuncInfo.MBBMap [BI->getSuccessor(i: `1`)];
768
769	// For now, just try the simplest case where it's fed by a compare.
770	if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
771	if (isValueAvailable(V: CI)) {
772	std::optional<PPC::Predicate> OptPPCPred =
773	getComparePred(Pred: CI->getPredicate());
774	if (!OptPPCPred)
775	return false;
776
777	PPC::Predicate PPCPred = *OptPPCPred;
778
779	// Take advantage of fall-through opportunities.
780	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
781	std::swap(a&: TBB, b&: FBB);
782	PPCPred = PPC::InvertPredicate(Opcode: PPCPred);
783	}
784
785	Register CondReg = createResultReg(&PPC::CRRCRegClass);
786
787	if (!PPCEmitCmp(Src1Value: CI->getOperand(i_nocapture: `0`), Src2Value: CI->getOperand(i_nocapture: `1`), isZExt: CI->isUnsigned(),
788	DestReg: CondReg, Pred: PPCPred))
789	return false;
790
791	BuildMI(*BrBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCC))
792	.addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
793	.addReg(CondReg)
794	.addMBB(TBB);
795	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
796	return true;
797	}
798	} else if (const ConstantInt *CI =
799	dyn_cast<ConstantInt>(Val: BI->getCondition())) {
800	uint64_t Imm = CI->getZExtValue();
801	MachineBasicBlock *Target = (Imm == `0`) ? FBB : TBB;
802	fastEmitBranch(MSucc: Target, DbgLoc: MIMD.getDL());
803	return true;
804	}
805
806	// FIXME: ARM looks for a case where the block containing the compare
807	// has been split from the block containing the branch. If this happens,
808	// there is a vreg available containing the result of the compare. I'm
809	// not sure we can do much, as we've lost the predicate information with
810	// the compare instruction -- we have a 4-bit CR but don't know which bit
811	// to test here.
812	return false;
813	}
814
815	// Attempt to emit a compare of the two source values. Signed and unsigned
816	// comparisons are supported. Return false if we can't handle it.
817	bool PPCFastISel::PPCEmitCmp(const Value SrcValue1, const* Value *SrcValue2,
818	bool IsZExt, unsigned DestReg,
819	const PPC::Predicate Pred) {
820	Type *Ty = SrcValue1->getType();
821	EVT SrcEVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
822	if (!SrcEVT.isSimple())
823	return false;
824	MVT SrcVT = SrcEVT.getSimpleVT();
825
826	if (SrcVT == MVT::i1 && Subtarget->useCRBits())
827	return false;
828
829	// See if operand 2 is an immediate encodeable in the compare.
830	// FIXME: Operands are not in canonical order at -O0, so an immediate
831	// operand in position 1 is a lost opportunity for now. We are
832	// similar to ARM in this regard.
833	int64_t Imm = `0`;
834	bool UseImm = false;
835	const bool HasSPE = Subtarget->hasSPE();
836
837	// Only 16-bit integer constants can be represented in compares for
838	// PowerPC. Others will be materialized into a register.
839	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: SrcValue2)) {
840	if (SrcVT == MVT::i64 \|\| SrcVT == MVT::i32 \|\| SrcVT == MVT::i16 \|\|
841	SrcVT == MVT::i8 \|\| SrcVT == MVT::i1) {
842	const APInt &CIVal = ConstInt->getValue();
843	Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
844	(int64_t)CIVal.getSExtValue();
845	if ((IsZExt && isUInt<`16`>(x: Imm)) \|\| (!IsZExt && isInt<`16`>(x: Imm)))
846	UseImm = true;
847	}
848	}
849
850	Register SrcReg1 = getRegForValue(V: SrcValue1);
851	if (SrcReg1 == `0`)
852	return false;
853
854	unsigned SrcReg2 = `0`;
855	if (!UseImm) {
856	SrcReg2 = getRegForValue(V: SrcValue2);
857	if (SrcReg2 == `0`)
858	return false;
859	}
860
861	unsigned CmpOpc;
862	bool NeedsExt = false;
863
864	auto RC1 = MRI.getRegClass(Reg: SrcReg1);
865	auto RC2 = SrcReg2 != `0` ? MRI.getRegClass(Reg: SrcReg2) : nullptr;
866
867	switch (SrcVT.SimpleTy) {
868	default: return false;
869	case MVT::f32:
870	if (HasSPE) {
871	switch (Pred) {
872	default: return false;
873	case PPC::PRED_EQ:
874	CmpOpc = PPC::EFSCMPEQ;
875	break;
876	case PPC::PRED_LT:
877	CmpOpc = PPC::EFSCMPLT;
878	break;
879	case PPC::PRED_GT:
880	CmpOpc = PPC::EFSCMPGT;
881	break;
882	}
883	} else {
884	CmpOpc = PPC::FCMPUS;
885	if (isVSSRCRegClass(RC1))
886	SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
887	if (RC2 && isVSSRCRegClass(RC2))
888	SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
889	}
890	break;
891	case MVT::f64:
892	if (HasSPE) {
893	switch (Pred) {
894	default: return false;
895	case PPC::PRED_EQ:
896	CmpOpc = PPC::EFDCMPEQ;
897	break;
898	case PPC::PRED_LT:
899	CmpOpc = PPC::EFDCMPLT;
900	break;
901	case PPC::PRED_GT:
902	CmpOpc = PPC::EFDCMPGT;
903	break;
904	}
905	} else if (isVSFRCRegClass(RC: RC1) \|\| (RC2 && isVSFRCRegClass(RC: RC2))) {
906	CmpOpc = PPC::XSCMPUDP;
907	} else {
908	CmpOpc = PPC::FCMPUD;
909	}
910	break;
911	case MVT::i1:
912	case MVT::i8:
913	case MVT::i16:
914	NeedsExt = true;
915	[[fallthrough]];
916	case MVT::i32:
917	if (!UseImm)
918	CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
919	else
920	CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
921	break;
922	case MVT::i64:
923	if (!UseImm)
924	CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
925	else
926	CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
927	break;
928	}
929
930	if (NeedsExt) {
931	Register ExtReg = createResultReg(&PPC::GPRCRegClass);
932	if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
933	return false;
934	SrcReg1 = ExtReg;
935
936	if (!UseImm) {
937	Register ExtReg = createResultReg(&PPC::GPRCRegClass);
938	if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
939	return false;
940	SrcReg2 = ExtReg;
941	}
942	}
943
944	if (!UseImm)
945	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
946	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
947	else
948	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
949	.addReg(RegNo: SrcReg1).addImm(Val: Imm);
950
951	return true;
952	}
953
954	// Attempt to fast-select a floating-point extend instruction.
955	bool PPCFastISel::SelectFPExt(const Instruction *I) {
956	Value *Src = I->getOperand(i: `0`);
957	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
958	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
959
960	if (SrcVT != MVT::f32 \|\| DestVT != MVT::f64)
961	return false;
962
963	Register SrcReg = getRegForValue(V: Src);
964	if (!SrcReg)
965	return false;
966
967	// No code is generated for a FP extend.
968	updateValueMap(I, Reg: SrcReg);
969	return true;
970	}
971
972	// Attempt to fast-select a floating-point truncate instruction.
973	bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
974	Value *Src = I->getOperand(i: `0`);
975	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
976	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
977
978	if (SrcVT != MVT::f64 \|\| DestVT != MVT::f32)
979	return false;
980
981	Register SrcReg = getRegForValue(V: Src);
982	if (!SrcReg)
983	return false;
984
985	// Round the result to single precision.
986	unsigned DestReg;
987	auto RC = MRI.getRegClass(Reg: SrcReg);
988	if (Subtarget->hasSPE()) {
989	DestReg = createResultReg(&PPC::GPRCRegClass);
990	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::EFSCFD),
991	DestReg)
992	.addReg(SrcReg);
993	} else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
994	DestReg = createResultReg(&PPC::VSSRCRegClass);
995	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::XSRSP),
996	DestReg)
997	.addReg(SrcReg);
998	} else {
999	SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1000	DestReg = createResultReg(&PPC::F4RCRegClass);
1001	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1002	TII.get(PPC::FRSP), DestReg)
1003	.addReg(SrcReg);
1004	}
1005
1006	updateValueMap(I, Reg: DestReg);
1007	return true;
1008	}
1009
1010	// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
1011	// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1012	// those should be used instead of moving via a stack slot when the
1013	// subtarget permits.
1014	// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1015	// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1016	// case to 8 bytes which produces tighter code but wastes stack space.
1017	unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
1018	bool IsSigned) {
1019
1020	// If necessary, extend 32-bit int to 64-bit.
1021	if (SrcVT == MVT::i32) {
1022	Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1023	if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
1024	return `0`;
1025	SrcReg = TmpReg;
1026	}
1027
1028	// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1029	Address Addr;
1030	Addr.BaseType = Address::FrameIndexBase;
1031	Addr.Base.FI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
1032
1033	// Store the value from the GPR.
1034	if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
1035	return `0`;
1036
1037	// Load the integer value into an FPR. The kind of load used depends
1038	// on a number of conditions.
1039	unsigned LoadOpc = PPC::LFD;
1040
1041	if (SrcVT == MVT::i32) {
1042	if (!IsSigned) {
1043	LoadOpc = PPC::LFIWZX;
1044	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1045	} else if (Subtarget->hasLFIWAX()) {
1046	LoadOpc = PPC::LFIWAX;
1047	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1048	}
1049	}
1050
1051	const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1052	Register ResultReg = `0`;
1053	if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1054	return `0`;
1055
1056	return ResultReg;
1057	}
1058
1059	// Attempt to fast-select an integer-to-floating-point conversion.
1060	// FIXME: Once fast-isel has better support for VSX, conversions using
1061	// direct moves should be implemented.
1062	bool PPCFastISel::SelectIToFP(const Instruction I, bool* IsSigned) {
1063	MVT DstVT;
1064	Type *DstTy = I->getType();
1065	if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1066	return false;
1067
1068	if (DstVT != MVT::f32 && DstVT != MVT::f64)
1069	return false;
1070
1071	Value *Src = I->getOperand(i: `0`);
1072	EVT SrcEVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1073	if (!SrcEVT.isSimple())
1074	return false;
1075
1076	MVT SrcVT = SrcEVT.getSimpleVT();
1077
1078	if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1079	SrcVT != MVT::i32 && SrcVT != MVT::i64)
1080	return false;
1081
1082	Register SrcReg = getRegForValue(V: Src);
1083	if (SrcReg == `0`)
1084	return false;
1085
1086	// Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1087	if (Subtarget->hasSPE()) {
1088	unsigned Opc;
1089	if (DstVT == MVT::f32)
1090	Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1091	else
1092	Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1093
1094	Register DestReg = createResultReg(&PPC::SPERCRegClass);
1095	// Generate the convert.
1096	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1097	.addReg(RegNo: SrcReg);
1098	updateValueMap(I, Reg: DestReg);
1099	return true;
1100	}
1101
1102	// We can only lower an unsigned convert if we have the newer
1103	// floating-point conversion operations.
1104	if (!IsSigned && !Subtarget->hasFPCVT())
1105	return false;
1106
1107	// FIXME: For now we require the newer floating-point conversion operations
1108	// (which are present only on P7 and A2 server models) when converting
1109	// to single-precision float. Otherwise we have to generate a lot of
1110	// fiddly code to avoid double rounding. If necessary, the fiddly code
1111	// can be found in PPCTargetLowering::LowerINT_TO_FP().
1112	if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1113	return false;
1114
1115	// Extend the input if necessary.
1116	if (SrcVT == MVT::i8 \|\| SrcVT == MVT::i16) {
1117	Register TmpReg = createResultReg(&PPC::G8RCRegClass);
1118	if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1119	return false;
1120	SrcVT = MVT::i64;
1121	SrcReg = TmpReg;
1122	}
1123
1124	// Move the integer value to an FPR.
1125	unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1126	if (FPReg == `0`)
1127	return false;
1128
1129	// Determine the opcode for the conversion.
1130	const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1131	Register DestReg = createResultReg(RC);
1132	unsigned Opc;
1133
1134	if (DstVT == MVT::f32)
1135	Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1136	else
1137	Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1138
1139	// Generate the convert.
1140	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1141	.addReg(RegNo: FPReg);
1142
1143	updateValueMap(I, Reg: DestReg);
1144	return true;
1145	}
1146
1147	// Move the floating-point value in SrcReg into an integer destination
1148	// register, and return the register (or zero if we can't handle it).
1149	// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1150	// those should be used instead of moving via a stack slot when the
1151	// subtarget permits.
1152	unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1153	unsigned SrcReg, bool IsSigned) {
1154	// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1155	// Note that if have STFIWX available, we could use a 4-byte stack
1156	// slot for i32, but this being fast-isel we'll just go with the
1157	// easiest code gen possible.
1158	Address Addr;
1159	Addr.BaseType = Address::FrameIndexBase;
1160	Addr.Base.FI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
1161
1162	// Store the value from the FPR.
1163	if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1164	return `0`;
1165
1166	// Reload it into a GPR. If we want an i32 on big endian, modify the
1167	// address to have a 4-byte offset so we load from the right place.
1168	if (VT == MVT::i32)
1169	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1170
1171	// Look at the currently assigned register for this instruction
1172	// to determine the required register class.
1173	Register AssignedReg = FuncInfo.ValueMap [I];
1174	const TargetRegisterClass *RC =
1175	AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
1176
1177	Register ResultReg = `0`;
1178	if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: !IsSigned))
1179	return `0`;
1180
1181	return ResultReg;
1182	}
1183
1184	// Attempt to fast-select a floating-point-to-integer conversion.
1185	// FIXME: Once fast-isel has better support for VSX, conversions using
1186	// direct moves should be implemented.
1187	bool PPCFastISel::SelectFPToI(const Instruction I, bool* IsSigned) {
1188	MVT DstVT, SrcVT;
1189	Type *DstTy = I->getType();
1190	if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1191	return false;
1192
1193	if (DstVT != MVT::i32 && DstVT != MVT::i64)
1194	return false;
1195
1196	// If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1197	if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1198	!Subtarget->hasSPE())
1199	return false;
1200
1201	Value *Src = I->getOperand(i: `0`);
1202	Type *SrcTy = Src->getType();
1203	if (!isTypeLegal(Ty: SrcTy, VT&: SrcVT))
1204	return false;
1205
1206	if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1207	return false;
1208
1209	Register SrcReg = getRegForValue(V: Src);
1210	if (SrcReg == `0`)
1211	return false;
1212
1213	// Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1214	// meaningless copy to get the register class right.
1215	const TargetRegisterClass *InRC = MRI.getRegClass(Reg: SrcReg);
1216	if (InRC == &PPC::F4RCRegClass)
1217	SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1218	else if (InRC == &PPC::VSSRCRegClass)
1219	SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
1220
1221	// Determine the opcode for the conversion, which takes place
1222	// entirely within FPRs or VSRs.
1223	unsigned DestReg;
1224	unsigned Opc;
1225	auto RC = MRI.getRegClass(Reg: SrcReg);
1226
1227	if (Subtarget->hasSPE()) {
1228	DestReg = createResultReg(&PPC::GPRCRegClass);
1229	if (IsSigned)
1230	Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1231	else
1232	Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1233	} else if (isVSFRCRegClass(RC)) {
1234	DestReg = createResultReg(&PPC::VSFRCRegClass);
1235	if (DstVT == MVT::i32)
1236	Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1237	else
1238	Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1239	} else {
1240	DestReg = createResultReg(&PPC::F8RCRegClass);
1241	if (DstVT == MVT::i32)
1242	if (IsSigned)
1243	Opc = PPC::FCTIWZ;
1244	else
1245	Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1246	else
1247	Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1248	}
1249
1250	// Generate the convert.
1251	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1252	.addReg(RegNo: SrcReg);
1253
1254	// Now move the integer value from a float register to an integer register.
1255	unsigned IntReg = Subtarget->hasSPE()
1256	? DestReg
1257	: PPCMoveToIntReg(I, VT: DstVT, SrcReg: DestReg, IsSigned);
1258
1259	if (IntReg == `0`)
1260	return false;
1261
1262	updateValueMap(I, Reg: IntReg);
1263	return true;
1264	}
1265
1266	// Attempt to fast-select a binary integer operation that isn't already
1267	// handled automatically.
1268	bool PPCFastISel::SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode) {
1269	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1270
1271	// We can get here in the case when we have a binary operation on a non-legal
1272	// type and the target independent selector doesn't know how to handle it.
1273	if (DestVT != MVT::i16 && DestVT != MVT::i8)
1274	return false;
1275
1276	// Look at the currently assigned register for this instruction
1277	// to determine the required register class. If there is no register,
1278	// make a conservative choice (don't assign R0).
1279	Register AssignedReg = FuncInfo.ValueMap [I];
1280	const TargetRegisterClass *RC =
1281	(AssignedReg ? MRI.getRegClass(AssignedReg) :
1282	&PPC::GPRC_and_GPRC_NOR0RegClass);
1283	bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1284
1285	unsigned Opc;
1286	switch (ISDOpcode) {
1287	default: return false;
1288	case ISD::ADD:
1289	Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1290	break;
1291	case ISD::OR:
1292	Opc = IsGPRC ? PPC::OR : PPC::OR8;
1293	break;
1294	case ISD::SUB:
1295	Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1296	break;
1297	}
1298
1299	Register ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1300	Register SrcReg1 = getRegForValue(V: I->getOperand(i: `0`));
1301	if (SrcReg1 == `0`) return false;
1302
1303	// Handle case of small immediate operand.
1304	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: I->getOperand(i: `1`))) {
1305	const APInt &CIVal = ConstInt->getValue();
1306	int Imm = (int)CIVal.getSExtValue();
1307	bool UseImm = true;
1308	if (isInt<`16`>(x: Imm)) {
1309	switch (Opc) {
1310	default:
1311	llvm_unreachable("Missing case!");
1312	case PPC::ADD4:
1313	Opc = PPC::ADDI;
1314	MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1315	break;
1316	case PPC::ADD8:
1317	Opc = PPC::ADDI8;
1318	MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1319	break;
1320	case PPC::OR:
1321	Opc = PPC::ORI;
1322	break;
1323	case PPC::OR8:
1324	Opc = PPC::ORI8;
1325	break;
1326	case PPC::SUBF:
1327	if (Imm == -`32768`)
1328	UseImm = false;
1329	else {
1330	Opc = PPC::ADDI;
1331	MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1332	Imm = -Imm;
1333	}
1334	break;
1335	case PPC::SUBF8:
1336	if (Imm == -`32768`)
1337	UseImm = false;
1338	else {
1339	Opc = PPC::ADDI8;
1340	MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1341	Imm = -Imm;
1342	}
1343	break;
1344	}
1345
1346	if (UseImm) {
1347	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
1348	DestReg: ResultReg)
1349	.addReg(RegNo: SrcReg1)
1350	.addImm(Val: Imm);
1351	updateValueMap(I, Reg: ResultReg);
1352	return true;
1353	}
1354	}
1355	}
1356
1357	// Reg-reg case.
1358	Register SrcReg2 = getRegForValue(V: I->getOperand(i: `1`));
1359	if (SrcReg2 == `0`) return false;
1360
1361	// Reverse operands for subtract-from.
1362	if (ISDOpcode == ISD::SUB)
1363	std::swap(a&: SrcReg1, b&: SrcReg2);
1364
1365	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1366	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
1367	updateValueMap(I, Reg: ResultReg);
1368	return true;
1369	}
1370
1371	// Handle arguments to a call that we're attempting to fast-select.
1372	// Return false if the arguments are too complex for us at the moment.
1373	bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1374	SmallVectorImpl<unsigned> &ArgRegs,
1375	SmallVectorImpl<MVT> &ArgVTs,
1376	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1377	SmallVectorImpl<unsigned> &RegArgs,
1378	CallingConv::ID CC,
1379	unsigned &NumBytes,
1380	bool IsVarArg) {
1381	SmallVector<CCValAssign, `16`> ArgLocs;
1382	CCState CCInfo(CC, IsVarArg, FuncInfo.MF, ArgLocs, Context);
1383
1384	// Reserve space for the linkage area on the stack.
1385	unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1386	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (`8`));
1387
1388	CCInfo.AnalyzeCallOperands(ArgVTs, Flags&: ArgFlags, Fn: CC_PPC64_ELF_FIS);
1389
1390	// Bail out if we can't handle any of the arguments.
1391	for (unsigned I = `0`, E = ArgLocs.size(); I != E; ++I) {
1392	CCValAssign &VA = ArgLocs [I];
1393	MVT ArgVT = ArgVTs [VA.getValNo()];
1394
1395	// Skip vector arguments for now, as well as long double and
1396	// uint128_t, and anything that isn't passed in a register.
1397	if (ArgVT.isVector() \|\| ArgVT.getSizeInBits() > `64` \|\| ArgVT == MVT::i1 \|\|
1398	!VA.isRegLoc() \|\| VA.needsCustom())
1399	return false;
1400
1401	// Skip bit-converted arguments for now.
1402	if (VA.getLocInfo() == CCValAssign::BCvt)
1403	return false;
1404	}
1405
1406	// Get a count of how many bytes are to be pushed onto the stack.
1407	NumBytes = CCInfo.getStackSize();
1408
1409	// The prolog code of the callee may store up to 8 GPR argument registers to
1410	// the stack, allowing va_start to index over them in memory if its varargs.
1411	// Because we cannot tell if this is needed on the caller side, we have to
1412	// conservatively assume that it is needed. As such, make sure we have at
1413	// least enough stack space for the caller to store the 8 GPRs.
1414	// FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1415	NumBytes = std::max(a: NumBytes, b: LinkageSize + `64`);
1416
1417	// Issue CALLSEQ_START.
1418	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1419	MCID: TII.get(Opcode: TII.getCallFrameSetupOpcode()))
1420	.addImm(Val: NumBytes).addImm(Val: `0`);
1421
1422	// Prepare to assign register arguments. Every argument uses up a
1423	// GPR protocol register even if it's passed in a floating-point
1424	// register (unless we're using the fast calling convention).
1425	unsigned NextGPR = PPC::X3;
1426	unsigned NextFPR = PPC::F1;
1427
1428	// Process arguments.
1429	for (unsigned I = `0`, E = ArgLocs.size(); I != E; ++I) {
1430	CCValAssign &VA = ArgLocs [I];
1431	unsigned Arg = ArgRegs [VA.getValNo()];
1432	MVT ArgVT = ArgVTs [VA.getValNo()];
1433
1434	// Handle argument promotion and bitcasts.
1435	switch (VA.getLocInfo()) {
1436	default:
1437	llvm_unreachable("Unknown loc info!");
1438	case CCValAssign::Full:
1439	break;
1440	case CCValAssign::SExt: {
1441	MVT DestVT = VA.getLocVT();
1442	const TargetRegisterClass *RC =
1443	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1444	Register TmpReg = createResultReg(RC);
1445	if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /IsZExt/false))
1446	llvm_unreachable("Failed to emit a sext!");
1447	ArgVT = DestVT;
1448	Arg = TmpReg;
1449	break;
1450	}
1451	case CCValAssign::AExt:
1452	case CCValAssign::ZExt: {
1453	MVT DestVT = VA.getLocVT();
1454	const TargetRegisterClass *RC =
1455	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1456	Register TmpReg = createResultReg(RC);
1457	if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /IsZExt/true))
1458	llvm_unreachable("Failed to emit a zext!");
1459	ArgVT = DestVT;
1460	Arg = TmpReg;
1461	break;
1462	}
1463	case CCValAssign::BCvt: {
1464	// FIXME: Not yet handled.
1465	llvm_unreachable("Should have bailed before getting here!");
1466	break;
1467	}
1468	}
1469
1470	// Copy this argument to the appropriate register.
1471	unsigned ArgReg;
1472	if (ArgVT == MVT::f32 \|\| ArgVT == MVT::f64) {
1473	ArgReg = NextFPR++;
1474	if (CC != CallingConv::Fast)
1475	++NextGPR;
1476	} else
1477	ArgReg = NextGPR++;
1478
1479	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1480	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ArgReg).addReg(RegNo: Arg);
1481	RegArgs.push_back(Elt: ArgReg);
1482	}
1483
1484	return true;
1485	}
1486
1487	// For a call that we've determined we can fast-select, finish the
1488	// call sequence and generate a copy to obtain the return value (if any).
1489	bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1490	CallingConv::ID CC = CLI.CallConv;
1491
1492	// Issue CallSEQ_END.
1493	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1494	MCID: TII.get(Opcode: TII.getCallFrameDestroyOpcode()))
1495	.addImm(Val: NumBytes).addImm(Val: `0`);
1496
1497	// Next, generate a copy to obtain the return value.
1498	// FIXME: No multi-register return values yet, though I don't foresee
1499	// any real difficulties there.
1500	if (RetVT != MVT::isVoid) {
1501	SmallVector<CCValAssign, `16`> RVLocs;
1502	CCState CCInfo(CC, false, FuncInfo.MF, RVLocs, Context);
1503	CCInfo.AnalyzeCallResult(VT: RetVT, Fn: RetCC_PPC64_ELF_FIS);
1504	CCValAssign &VA = RVLocs [`0`];
1505	assert(RVLocs.size() == `1` && "No support for multi-reg return values!");
1506	assert(VA.isRegLoc() && "Can only return in registers!");
1507
1508	MVT DestVT = VA.getValVT();
1509	MVT CopyVT = DestVT;
1510
1511	// Ints smaller than a register still arrive in a full 64-bit
1512	// register, so make sure we recognize this.
1513	if (RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32)
1514	CopyVT = MVT::i64;
1515
1516	unsigned SourcePhysReg = VA.getLocReg();
1517	unsigned ResultReg = `0`;
1518
1519	if (RetVT == CopyVT) {
1520	const TargetRegisterClass *CpyRC = TLI.getRegClassFor(VT: CopyVT);
1521	ResultReg = copyRegToRegClass(ToRC: CpyRC, SrcReg: SourcePhysReg);
1522
1523	// If necessary, round the floating result to single precision.
1524	} else if (CopyVT == MVT::f64) {
1525	ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: RetVT));
1526	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::FRSP),
1527	ResultReg).addReg(SourcePhysReg);
1528
1529	// If only the low half of a general register is needed, generate
1530	// a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1531	// used along the fast-isel path (not lowered), and downstream logic
1532	// also doesn't like a direct subreg copy on a physical reg.)
1533	} else if (RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32) {
1534	// Convert physical register from G8RC to GPRC.
1535	SourcePhysReg -= PPC::X0 - PPC::R0;
1536	ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
1537	}
1538
1539	assert(ResultReg && "ResultReg unset!");
1540	CLI.InRegs.push_back(Elt: SourcePhysReg);
1541	CLI.ResultReg = ResultReg;
1542	CLI.NumResultRegs = `1`;
1543	}
1544
1545	return true;
1546	}
1547
1548	bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1549	CallingConv::ID CC = CLI.CallConv;
1550	bool IsTailCall = CLI.IsTailCall;
1551	bool IsVarArg = CLI.IsVarArg;
1552	const Value *Callee = CLI.Callee;
1553	const MCSymbol *Symbol = CLI.Symbol;
1554
1555	if (!Callee && !Symbol)
1556	return false;
1557
1558	// Allow SelectionDAG isel to handle tail calls and long calls.
1559	if (IsTailCall \|\| Subtarget->useLongCalls())
1560	return false;
1561
1562	// Let SDISel handle vararg functions.
1563	if (IsVarArg)
1564	return false;
1565
1566	// If this is a PC-Rel function, let SDISel handle the call.
1567	if (Subtarget->isUsingPCRelativeCalls())
1568	return false;
1569
1570	// Handle simple calls for now, with legal return types and
1571	// those that can be extended.
1572	Type *RetTy = CLI.RetTy;
1573	MVT RetVT;
1574	if (RetTy->isVoidTy())
1575	RetVT = MVT::isVoid;
1576	else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1577	RetVT != MVT::i8)
1578	return false;
1579	else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1580	// We can't handle boolean returns when CR bits are in use.
1581	return false;
1582
1583	// FIXME: No multi-register return values yet.
1584	if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1585	RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1586	RetVT != MVT::f64) {
1587	SmallVector<CCValAssign, `16`> RVLocs;
1588	CCState CCInfo(CC, IsVarArg, FuncInfo.MF, RVLocs, Context);
1589	CCInfo.AnalyzeCallResult(VT: RetVT, Fn: RetCC_PPC64_ELF_FIS);
1590	if (RVLocs.size() > `1`)
1591	return false;
1592	}
1593
1594	// Bail early if more than 8 arguments, as we only currently
1595	// handle arguments passed in registers.
1596	unsigned NumArgs = CLI.OutVals.size();
1597	if (NumArgs > `8`)
1598	return false;
1599
1600	// Set up the argument vectors.
1601	SmallVector<Value*, `8`> Args;
1602	SmallVector<unsigned, `8`> ArgRegs;
1603	SmallVector<MVT, `8`> ArgVTs;
1604	SmallVector<ISD::ArgFlagsTy, `8`> ArgFlags;
1605
1606	Args.reserve(N: NumArgs);
1607	ArgRegs.reserve(N: NumArgs);
1608	ArgVTs.reserve(N: NumArgs);
1609	ArgFlags.reserve(N: NumArgs);
1610
1611	for (unsigned i = `0`, ie = NumArgs; i != ie; ++i) {
1612	// Only handle easy calls for now. It would be reasonably easy
1613	// to handle <= 8-byte structures passed ByVal in registers, but we
1614	// have to ensure they are right-justified in the register.
1615	ISD::ArgFlagsTy Flags = CLI.OutFlags [i];
1616	if (Flags.isInReg() \|\| Flags.isSRet() \|\| Flags.isNest() \|\| Flags.isByVal())
1617	return false;
1618
1619	Value *ArgValue = CLI.OutVals [i];
1620	Type *ArgTy = ArgValue->getType();
1621	MVT ArgVT;
1622	if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1623	return false;
1624
1625	// FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1626	// types, which is passed through vector register. Skip these types and
1627	// fallback to default SelectionDAG based selection.
1628	if (ArgVT.isVector() \|\| ArgVT == MVT::f128)
1629	return false;
1630
1631	Register Arg = getRegForValue(V: ArgValue);
1632	if (Arg == `0`)
1633	return false;
1634
1635	Args.push_back(Elt: ArgValue);
1636	ArgRegs.push_back(Elt: Arg);
1637	ArgVTs.push_back(Elt: ArgVT);
1638	ArgFlags.push_back(Elt: Flags);
1639	}
1640
1641	// Process the arguments.
1642	SmallVector<unsigned, `8`> RegArgs;
1643	unsigned NumBytes;
1644
1645	if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1646	RegArgs, CC, NumBytes, IsVarArg))
1647	return false;
1648
1649	MachineInstrBuilder MIB;
1650	// FIXME: No handling for function pointers yet. This requires
1651	// implementing the function descriptor (OPD) setup.
1652	const GlobalValue *GV = dyn_cast<GlobalValue>(Val: Callee);
1653	if (!GV) {
1654	// patchpoints are a special case; they always dispatch to a pointer value.
1655	// However, we don't actually want to generate the indirect call sequence
1656	// here (that will be generated, as necessary, during asm printing), and
1657	// the call we generate here will be erased by FastISel::selectPatchpoint,
1658	// so don't try very hard...
1659	if (CLI.IsPatchPoint)
1660	MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::NOP));
1661	else
1662	return false;
1663	} else {
1664	// Build direct call with NOP for TOC restore.
1665	// FIXME: We can and should optimize away the NOP for local calls.
1666	MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1667	TII.get(PPC::BL8_NOP));
1668	// Add callee.
1669	MIB.addGlobalAddress(GV);
1670	}
1671
1672	// Add implicit physical register uses to the call.
1673	for (unsigned II = `0`, IE = RegArgs.size(); II != IE; ++II)
1674	MIB.addReg(RegNo: RegArgs [II], flags: RegState::Implicit);
1675
1676	// Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1677	// into the call.
1678	PPCFuncInfo->setUsesTOCBasePtr();
1679	MIB.addReg(PPC::X2, RegState::Implicit);
1680
1681	// Add a register mask with the call-preserved registers. Proper
1682	// defs for return values will be added by setPhysRegsDeadExcept().
1683	MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
1684
1685	CLI.Call = MIB;
1686
1687	// Finish off the call including any return values.
1688	return finishCall(RetVT, CLI, NumBytes);
1689	}
1690
1691	// Attempt to fast-select a return instruction.
1692	bool PPCFastISel::SelectRet(const Instruction *I) {
1693
1694	if (!FuncInfo.CanLowerReturn)
1695	return false;
1696
1697	const ReturnInst *Ret = cast<ReturnInst>(Val: I);
1698	const Function &F = *I->getParent()->getParent();
1699
1700	// Build a list of return value registers.
1701	SmallVector<unsigned, `4`> RetRegs;
1702	CallingConv::ID CC = F.getCallingConv();
1703
1704	if (Ret->getNumOperands() > `0`) {
1705	SmallVector<ISD::OutputArg, `4`> Outs;
1706	GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
1707
1708	// Analyze operands of the call, assigning locations to each operand.
1709	SmallVector<CCValAssign, `16`> ValLocs;
1710	CCState CCInfo(CC, F.isVarArg(), FuncInfo.MF, ValLocs, Context);
1711	CCInfo.AnalyzeReturn(Outs, Fn: RetCC_PPC64_ELF_FIS);
1712	const Value *RV = Ret->getOperand(i_nocapture: `0`);
1713
1714	// FIXME: Only one output register for now.
1715	if (ValLocs.size() > `1`)
1716	return false;
1717
1718	// Special case for returning a constant integer of any size - materialize
1719	// the constant as an i64 and copy it to the return register.
1720	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RV)) {
1721	CCValAssign &VA = ValLocs [`0`];
1722
1723	Register RetReg = VA.getLocReg();
1724	// We still need to worry about properly extending the sign. For example,
1725	// we could have only a single bit or a constant that needs zero
1726	// extension rather than sign extension. Make sure we pass the return
1727	// value extension property to integer materialization.
1728	unsigned SrcReg =
1729	PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1730
1731	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1732	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetReg).addReg(RegNo: SrcReg);
1733
1734	RetRegs.push_back(Elt: RetReg);
1735
1736	} else {
1737	Register Reg = getRegForValue(V: RV);
1738
1739	if (Reg == `0`)
1740	return false;
1741
1742	// Copy the result values into the output registers.
1743	for (unsigned i = `0`; i < ValLocs.size(); ++i) {
1744
1745	CCValAssign &VA = ValLocs [i];
1746	assert(VA.isRegLoc() && "Can only return in registers!");
1747	RetRegs.push_back(Elt: VA.getLocReg());
1748	unsigned SrcReg = Reg + VA.getValNo();
1749
1750	EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
1751	if (!RVEVT.isSimple())
1752	return false;
1753	MVT RVVT = RVEVT.getSimpleVT();
1754	MVT DestVT = VA.getLocVT();
1755
1756	if (RVVT != DestVT && RVVT != MVT::i8 &&
1757	RVVT != MVT::i16 && RVVT != MVT::i32)
1758	return false;
1759
1760	if (RVVT != DestVT) {
1761	switch (VA.getLocInfo()) {
1762	default:
1763	llvm_unreachable("Unknown loc info!");
1764	case CCValAssign::Full:
1765	llvm_unreachable("Full value assign but types don't match?");
1766	case CCValAssign::AExt:
1767	case CCValAssign::ZExt: {
1768	const TargetRegisterClass *RC =
1769	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1770	Register TmpReg = createResultReg(RC);
1771	if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: true))
1772	return false;
1773	SrcReg = TmpReg;
1774	break;
1775	}
1776	case CCValAssign::SExt: {
1777	const TargetRegisterClass *RC =
1778	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1779	Register TmpReg = createResultReg(RC);
1780	if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: false))
1781	return false;
1782	SrcReg = TmpReg;
1783	break;
1784	}
1785	}
1786	}
1787
1788	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1789	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetRegs [i])
1790	.addReg(RegNo: SrcReg);
1791	}
1792	}
1793	}
1794
1795	MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1796	TII.get(PPC::BLR8));
1797
1798	for (unsigned i = `0`, e = RetRegs.size(); i != e; ++i)
1799	MIB.addReg(RegNo: RetRegs [i], flags: RegState::Implicit);
1800
1801	return true;
1802	}
1803
1804	// Attempt to emit an integer extend of SrcReg into DestReg. Both
1805	// signed and zero extensions are supported. Return false if we
1806	// can't handle it.
1807	bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1808	unsigned DestReg, bool IsZExt) {
1809	if (DestVT != MVT::i32 && DestVT != MVT::i64)
1810	return false;
1811	if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1812	return false;
1813
1814	// Signed extensions use EXTSB, EXTSH, EXTSW.
1815	if (!IsZExt) {
1816	unsigned Opc;
1817	if (SrcVT == MVT::i8)
1818	Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1819	else if (SrcVT == MVT::i16)
1820	Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1821	else {
1822	assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1823	Opc = PPC::EXTSW_32_64;
1824	}
1825	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1826	.addReg(RegNo: SrcReg);
1827
1828	// Unsigned 32-bit extensions use RLWINM.
1829	} else if (DestVT == MVT::i32) {
1830	unsigned MB;
1831	if (SrcVT == MVT::i8)
1832	MB = `24`;
1833	else {
1834	assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1835	MB = `16`;
1836	}
1837	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLWINM),
1838	DestReg)
1839	.addReg(SrcReg).addImm(/SH=/`0`).addImm(MB).addImm(/ME=/`31`);
1840
1841	// Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1842	} else {
1843	unsigned MB;
1844	if (SrcVT == MVT::i8)
1845	MB = `56`;
1846	else if (SrcVT == MVT::i16)
1847	MB = `48`;
1848	else
1849	MB = `32`;
1850	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1851	TII.get(PPC::RLDICL_32_64), DestReg)
1852	.addReg(SrcReg).addImm(/SH=/`0`).addImm(MB);
1853	}
1854
1855	return true;
1856	}
1857
1858	// Attempt to fast-select an indirect branch instruction.
1859	bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1860	Register AddrReg = getRegForValue(V: I->getOperand(i: `0`));
1861	if (AddrReg == `0`)
1862	return false;
1863
1864	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::MTCTR8))
1865	.addReg(AddrReg);
1866	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::BCTR8));
1867
1868	const IndirectBrInst *IB = cast<IndirectBrInst>(Val: I);
1869	for (const BasicBlock *SuccBB : IB->successors())
1870	FuncInfo.MBB->addSuccessor(Succ: FuncInfo.MBBMap [SuccBB]);
1871
1872	return true;
1873	}
1874
1875	// Attempt to fast-select an integer truncate instruction.
1876	bool PPCFastISel::SelectTrunc(const Instruction *I) {
1877	Value *Src = I->getOperand(i: `0`);
1878	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1879	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1880
1881	if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1882	return false;
1883
1884	if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1885	return false;
1886
1887	Register SrcReg = getRegForValue(V: Src);
1888	if (!SrcReg)
1889	return false;
1890
1891	// The only interesting case is when we need to switch register classes.
1892	if (SrcVT == MVT::i64)
1893	SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, `0`, PPC::sub_32);
1894
1895	updateValueMap(I, Reg: SrcReg);
1896	return true;
1897	}
1898
1899	// Attempt to fast-select an integer extend instruction.
1900	bool PPCFastISel::SelectIntExt(const Instruction *I) {
1901	Type *DestTy = I->getType();
1902	Value *Src = I->getOperand(i: `0`);
1903	Type *SrcTy = Src->getType();
1904
1905	bool IsZExt = isa<ZExtInst>(Val: I);
1906	Register SrcReg = getRegForValue(V: Src);
1907	if (!SrcReg) return false;
1908
1909	EVT SrcEVT, DestEVT;
1910	SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
1911	DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
1912	if (!SrcEVT.isSimple())
1913	return false;
1914	if (!DestEVT.isSimple())
1915	return false;
1916
1917	MVT SrcVT = SrcEVT.getSimpleVT();
1918	MVT DestVT = DestEVT.getSimpleVT();
1919
1920	// If we know the register class needed for the result of this
1921	// instruction, use it. Otherwise pick the register class of the
1922	// correct size that does not contain X0/R0, since we don't know
1923	// whether downstream uses permit that assignment.
1924	Register AssignedReg = FuncInfo.ValueMap [I];
1925	const TargetRegisterClass *RC =
1926	(AssignedReg ? MRI.getRegClass(AssignedReg) :
1927	(DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1928	&PPC::GPRC_and_GPRC_NOR0RegClass));
1929	Register ResultReg = createResultReg(RC);
1930
1931	if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, DestReg: ResultReg, IsZExt))
1932	return false;
1933
1934	updateValueMap(I, Reg: ResultReg);
1935	return true;
1936	}
1937
1938	// Attempt to fast-select an instruction that wasn't handled by
1939	// the table-generated machinery.
1940	bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1941
1942	switch (I->getOpcode()) {
1943	case Instruction::Load:
1944	return SelectLoad(I);
1945	case Instruction::Store:
1946	return SelectStore(I);
1947	case Instruction::Br:
1948	return SelectBranch(I);
1949	case Instruction::IndirectBr:
1950	return SelectIndirectBr(I);
1951	case Instruction::FPExt:
1952	return SelectFPExt(I);
1953	case Instruction::FPTrunc:
1954	return SelectFPTrunc(I);
1955	case Instruction::SIToFP:
1956	return SelectIToFP(I, /IsSigned/ true);
1957	case Instruction::UIToFP:
1958	return SelectIToFP(I, /IsSigned/ false);
1959	case Instruction::FPToSI:
1960	return SelectFPToI(I, /IsSigned/ true);
1961	case Instruction::FPToUI:
1962	return SelectFPToI(I, /IsSigned/ false);
1963	case Instruction::Add:
1964	return SelectBinaryIntOp(I, ISDOpcode: ISD::ADD);
1965	case Instruction::Or:
1966	return SelectBinaryIntOp(I, ISDOpcode: ISD::OR);
1967	case Instruction::Sub:
1968	return SelectBinaryIntOp(I, ISDOpcode: ISD::SUB);
1969	case Instruction::Ret:
1970	return SelectRet(I);
1971	case Instruction::Trunc:
1972	return SelectTrunc(I);
1973	case Instruction::ZExt:
1974	case Instruction::SExt:
1975	return SelectIntExt(I);
1976	// Here add other flavors of Instruction::XXX that automated
1977	// cases don't catch. For example, switches are terminators
1978	// that aren't yet handled.
1979	default:
1980	break;
1981	}
1982	return false;
1983	}
1984
1985	// Materialize a floating-point constant into a register, and return
1986	// the register number (or zero if we failed to handle it).
1987	unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1988	// If this is a PC-Rel function, let SDISel handle constant pool.
1989	if (Subtarget->isUsingPCRelativeCalls())
1990	return false;
1991
1992	// No plans to handle long double here.
1993	if (VT != MVT::f32 && VT != MVT::f64)
1994	return `0`;
1995
1996	// All FP constants are loaded from the constant pool.
1997	Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
1998	unsigned Idx = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
1999	const bool HasSPE = Subtarget->hasSPE();
2000	const TargetRegisterClass *RC;
2001	if (HasSPE)
2002	RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
2003	else
2004	RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
2005
2006	Register DestReg = createResultReg(RC);
2007	CodeModel::Model CModel = TM.getCodeModel();
2008
2009	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2010	MachinePointerInfo::getConstantPool(*FuncInfo.MF),
2011	MachineMemOperand::MOLoad, (VT == MVT::f32) ? `4` : `8`, Alignment);
2012
2013	unsigned Opc;
2014
2015	if (HasSPE)
2016	Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2017	else
2018	Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2019
2020	Register TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2021
2022	PPCFuncInfo->setUsesTOCBasePtr();
2023	// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2024	if (CModel == CodeModel::Small) {
2025	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocCPT),
2026	TmpReg)
2027	.addConstantPoolIndex(Idx).addReg(PPC::X2);
2028	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2029	.addImm(Val: `0`).addReg(RegNo: TmpReg).addMemOperand(MMO);
2030	} else {
2031	// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2032	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2033	TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
2034	// But for large code model, we must generate a LDtocL followed
2035	// by the LF[SD].
2036	if (CModel == CodeModel::Large) {
2037	Register TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2038	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2039	TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
2040	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2041	.addImm(Val: `0`)
2042	.addReg(RegNo: TmpReg2);
2043	} else
2044	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2045	.addConstantPoolIndex(Idx, Offset: `0`, TargetFlags: PPCII::MO_TOC_LO)
2046	.addReg(RegNo: TmpReg)
2047	.addMemOperand(MMO);
2048	}
2049
2050	return DestReg;
2051	}
2052
2053	// Materialize the address of a global value into a register, and return
2054	// the register number (or zero if we failed to handle it).
2055	unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2056	// If this is a PC-Rel function, let SDISel handle GV materialization.
2057	if (Subtarget->isUsingPCRelativeCalls())
2058	return false;
2059
2060	assert(VT == MVT::i64 && "Non-address!");
2061	const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2062	Register DestReg = createResultReg(RC);
2063
2064	// Global values may be plain old object addresses, TLS object
2065	// addresses, constant pool entries, or jump tables. How we generate
2066	// code for these may depend on small, medium, or large code model.
2067	CodeModel::Model CModel = TM.getCodeModel();
2068
2069	// FIXME: Jump tables are not yet required because fast-isel doesn't
2070	// handle switches; if that changes, we need them as well. For now,
2071	// what follows assumes everything's a generic (or TLS) global address.
2072
2073	// FIXME: We don't yet handle the complexity of TLS.
2074	if (GV->isThreadLocal())
2075	return `0`;
2076
2077	// If the global has the toc-data attribute then fallback to DAG-ISEL.
2078	if (TM.getTargetTriple().isOSAIX())
2079	if (const GlobalVariable *Var = dyn_cast_or_null<GlobalVariable>(Val: GV))
2080	if (Var->hasAttribute(Kind: "toc-data"))
2081	return false;
2082
2083	PPCFuncInfo->setUsesTOCBasePtr();
2084	// For small code model, generate a simple TOC load.
2085	if (CModel == CodeModel::Small)
2086	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtoc),
2087	DestReg)
2088	.addGlobalAddress(GV)
2089	.addReg(PPC::X2);
2090	else {
2091	// If the address is an externally defined symbol, a symbol with common
2092	// or externally available linkage, a non-local function address, or a
2093	// jump table address (not yet needed), or if we are generating code
2094	// for large code model, we generate:
2095	// LDtocL(GV, ADDIStocHA8(%x2, GV))
2096	// Otherwise we generate:
2097	// ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2098	// Either way, start with the ADDIStocHA8:
2099	Register HighPartReg = createResultReg(RC);
2100	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDIStocHA8),
2101	HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2102
2103	if (Subtarget->isGVIndirectSymbol(GV)) {
2104	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::LDtocL),
2105	DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2106	} else {
2107	// Otherwise generate the ADDItocL8.
2108	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDItocL8),
2109	DestReg)
2110	.addReg(HighPartReg)
2111	.addGlobalAddress(GV);
2112	}
2113	}
2114
2115	return DestReg;
2116	}
2117
2118	// Materialize a 32-bit integer constant into a register, and return
2119	// the register number (or zero if we failed to handle it).
2120	unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2121	const TargetRegisterClass *RC) {
2122	unsigned Lo = Imm & `0xFFFF`;
2123	unsigned Hi = (Imm >> `16`) & `0xFFFF`;
2124
2125	Register ResultReg = createResultReg(RC);
2126	bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2127
2128	if (isInt<`16`>(x: Imm))
2129	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2130	TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2131	.addImm(Imm);
2132	else if (Lo) {
2133	// Both Lo and Hi have nonzero bits.
2134	Register TmpReg = createResultReg(RC);
2135	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2136	TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2137	.addImm(Hi);
2138	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2139	TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2140	.addReg(TmpReg).addImm(Lo);
2141	} else
2142	// Just Hi bits.
2143	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2144	TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2145	.addImm(Hi);
2146
2147	return ResultReg;
2148	}
2149
2150	// Materialize a 64-bit integer constant into a register, and return
2151	// the register number (or zero if we failed to handle it).
2152	unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2153	const TargetRegisterClass *RC) {
2154	unsigned Remainder = `0`;
2155	unsigned Shift = `0`;
2156
2157	// If the value doesn't fit in 32 bits, see if we can shift it
2158	// so that it fits in 32 bits.
2159	if (!isInt<`32`>(x: Imm)) {
2160	Shift = llvm::countr_zero<uint64_t>(Val: Imm);
2161	int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2162
2163	if (isInt<`32`>(x: ImmSh))
2164	Imm = ImmSh;
2165	else {
2166	Remainder = Imm;
2167	Shift = `32`;
2168	Imm >>= `32`;
2169	}
2170	}
2171
2172	// Handle the high-order 32 bits (if shifted) or the whole 32 bits
2173	// (if not shifted).
2174	unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2175	if (!Shift)
2176	return TmpReg1;
2177
2178	// If upper 32 bits were not zero, we've built them and need to shift
2179	// them into place.
2180	unsigned TmpReg2;
2181	if (Imm) {
2182	TmpReg2 = createResultReg(RC);
2183	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::RLDICR),
2184	TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(`63` - Shift);
2185	} else
2186	TmpReg2 = TmpReg1;
2187
2188	unsigned TmpReg3, Hi, Lo;
2189	if ((Hi = (Remainder >> `16`) & `0xFFFF`)) {
2190	TmpReg3 = createResultReg(RC);
2191	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORIS8),
2192	TmpReg3).addReg(TmpReg2).addImm(Hi);
2193	} else
2194	TmpReg3 = TmpReg2;
2195
2196	if ((Lo = Remainder & `0xFFFF`)) {
2197	Register ResultReg = createResultReg(RC);
2198	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ORI8),
2199	ResultReg).addReg(TmpReg3).addImm(Lo);
2200	return ResultReg;
2201	}
2202
2203	return TmpReg3;
2204	}
2205
2206	// Materialize an integer constant into a register, and return
2207	// the register number (or zero if we failed to handle it).
2208	unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2209	bool UseSExt) {
2210	// If we're using CR bit registers for i1 values, handle that as a special
2211	// case first.
2212	if (VT == MVT::i1 && Subtarget->useCRBits()) {
2213	Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2214	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2215	TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2216	return ImmReg;
2217	}
2218
2219	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2220	VT != MVT::i1)
2221	return `0`;
2222
2223	const TargetRegisterClass *RC =
2224	((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2225	int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2226
2227	// If the constant is in range, use a load-immediate.
2228	// Since LI will sign extend the constant we need to make sure that for
2229	// our zeroext constants that the sign extended constant fits into 16-bits -
2230	// a range of 0..0x7fff.
2231	if (isInt<`16`>(x: Imm)) {
2232	unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2233	Register ImmReg = createResultReg(RC);
2234	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
2235	.addImm(Val: Imm);
2236	return ImmReg;
2237	}
2238
2239	// Construct the constant piecewise.
2240	if (VT == MVT::i64)
2241	return PPCMaterialize64BitInt(Imm, RC);
2242	else if (VT == MVT::i32)
2243	return PPCMaterialize32BitInt(Imm, RC);
2244
2245	return `0`;
2246	}
2247
2248	// Materialize a constant into a register, and return the register
2249	// number (or zero if we failed to handle it).
2250	unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
2251	EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
2252
2253	// Only handle simple types.
2254	if (!CEVT.isSimple()) return `0`;
2255	MVT VT = CEVT.getSimpleVT();
2256
2257	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
2258	return PPCMaterializeFP(CFP, VT);
2259	else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
2260	return PPCMaterializeGV(GV, VT);
2261	else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: C))
2262	// Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2263	// assumes that constant PHI operands will be zero extended, and failure to
2264	// match that assumption will cause problems if we sign extend here but
2265	// some user of a PHI is in a block for which we fall back to full SDAG
2266	// instruction selection.
2267	return PPCMaterializeInt(CI, VT, UseSExt: false);
2268
2269	return `0`;
2270	}
2271
2272	// Materialize the address created by an alloca into a register, and
2273	// return the register number (or zero if we failed to handle it).
2274	unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2275	// Don't handle dynamic allocas.
2276	if (!FuncInfo.StaticAllocaMap.count(Val: AI)) return `0`;
2277
2278	MVT VT;
2279	if (!isLoadTypeLegal(Ty: AI->getType(), VT)) return `0`;
2280
2281	DenseMap<const AllocaInst, int*>::iterator SI =
2282	FuncInfo.StaticAllocaMap.find(Val: AI);
2283
2284	if (SI != FuncInfo.StaticAllocaMap.end()) {
2285	Register ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2286	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(PPC::ADDI8),
2287	ResultReg).addFrameIndex(SI->second).addImm(`0`);
2288	return ResultReg;
2289	}
2290
2291	return `0`;
2292	}
2293
2294	// Fold loads into extends when possible.
2295	// FIXME: We can have multiple redundant extend/trunc instructions
2296	// following a load. The folding only picks up one. Extend this
2297	// to check subsequent instructions for the same pattern and remove
2298	// them. Thus ResultReg should be the def reg for the last redundant
2299	// instruction in a chain, and all intervening instructions can be
2300	// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2301	// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2302	bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
2303	const LoadInst *LI) {
2304	// Verify we have a legal type before going any further.
2305	MVT VT;
2306	if (!isLoadTypeLegal(Ty: LI->getType(), VT))
2307	return false;
2308
2309	// Combine load followed by zero- or sign-extend.
2310	bool IsZExt = false;
2311	switch(MI->getOpcode()) {
2312	default:
2313	return false;
2314
2315	case PPC::RLDICL:
2316	case PPC::RLDICL_32_64: {
2317	IsZExt = true;
2318	unsigned MB = MI->getOperand(i: `3`).getImm();
2319	if ((VT == MVT::i8 && MB <= `56`) \|\|
2320	(VT == MVT::i16 && MB <= `48`) \|\|
2321	(VT == MVT::i32 && MB <= `32`))
2322	break;
2323	return false;
2324	}
2325
2326	case PPC::RLWINM:
2327	case PPC::RLWINM8: {
2328	IsZExt = true;
2329	unsigned MB = MI->getOperand(i: `3`).getImm();
2330	if ((VT == MVT::i8 && MB <= `24`) \|\|
2331	(VT == MVT::i16 && MB <= `16`))
2332	break;
2333	return false;
2334	}
2335
2336	case PPC::EXTSB:
2337	case PPC::EXTSB8:
2338	case PPC::EXTSB8_32_64:
2339	/ There is no sign-extending load-byte instruction. /
2340	return false;
2341
2342	case PPC::EXTSH:
2343	case PPC::EXTSH8:
2344	case PPC::EXTSH8_32_64: {
2345	if (VT != MVT::i16 && VT != MVT::i8)
2346	return false;
2347	break;
2348	}
2349
2350	case PPC::EXTSW:
2351	case PPC::EXTSW_32:
2352	case PPC::EXTSW_32_64: {
2353	if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2354	return false;
2355	break;
2356	}
2357	}
2358
2359	// See if we can handle this address.
2360	Address Addr;
2361	if (!PPCComputeAddress(Obj: LI->getOperand(i_nocapture: `0`), Addr))
2362	return false;
2363
2364	Register ResultReg = MI->getOperand(i: `0`).getReg();
2365
2366	if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
2367	Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2368	return false;
2369
2370	MachineBasicBlock::iterator I(MI);
2371	removeDeadCode(I, E: std::next(x: I));
2372	return true;
2373	}
2374
2375	// Attempt to lower call arguments in a faster way than done by
2376	// the selection DAG code.
2377	bool PPCFastISel::fastLowerArguments() {
2378	// Defer to normal argument lowering for now. It's reasonably
2379	// efficient. Consider doing something like ARM to handle the
2380	// case where all args fit in registers, no varargs, no float
2381	// or vector args.
2382	return false;
2383	}
2384
2385	// Handle materializing integer constants into a register. This is not
2386	// automatically generated for PowerPC, so must be explicitly created here.
2387	unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2388
2389	if (Opc != ISD::Constant)
2390	return `0`;
2391
2392	// If we're using CR bit registers for i1 values, handle that as a special
2393	// case first.
2394	if (VT == MVT::i1 && Subtarget->useCRBits()) {
2395	Register ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2396	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2397	TII.get(Imm == `0` ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2398	return ImmReg;
2399	}
2400
2401	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2402	VT != MVT::i1)
2403	return `0`;
2404
2405	const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2406	&PPC::GPRCRegClass);
2407	if (VT == MVT::i64)
2408	return PPCMaterialize64BitInt(Imm, RC);
2409	else
2410	return PPCMaterialize32BitInt(Imm, RC);
2411	}
2412
2413	// Override for ADDI and ADDI8 to set the correct register class
2414	// on RHS operand 0. The automatic infrastructure naively assumes
2415	// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2416	// for these cases. At the moment, none of the other automatically
2417	// generated RI instructions require special treatment. However, once
2418	// SelectSelect is implemented, "isel" requires similar handling.
2419	//
2420	// Also be conservative about the output register class. Avoid
2421	// assigning R0 or X0 to the output register for GPRC and G8RC
2422	// register classes, as any such result could be used in ADDI, etc.,
2423	// where those regs have another meaning.
2424	unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2425	const TargetRegisterClass *RC,
2426	unsigned Op0,
2427	uint64_t Imm) {
2428	if (MachineInstOpcode == PPC::ADDI)
2429	MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2430	else if (MachineInstOpcode == PPC::ADDI8)
2431	MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2432
2433	const TargetRegisterClass *UseRC =
2434	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2435	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2436
2437	return FastISel::fastEmitInst_ri(MachineInstOpcode, RC: UseRC, Op0, Imm);
2438	}
2439
2440	// Override for instructions with one register operand to avoid use of
2441	// R0/X0. The automatic infrastructure isn't aware of the context so
2442	// we must be conservative.
2443	unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2444	const TargetRegisterClass* RC,
2445	unsigned Op0) {
2446	const TargetRegisterClass *UseRC =
2447	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2448	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2449
2450	return FastISel::fastEmitInst_r(MachineInstOpcode, RC: UseRC, Op0);
2451	}
2452
2453	// Override for instructions with two register operands to avoid use
2454	// of R0/X0. The automatic infrastructure isn't aware of the context
2455	// so we must be conservative.
2456	unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2457	const TargetRegisterClass* RC,
2458	unsigned Op0, unsigned Op1) {
2459	const TargetRegisterClass *UseRC =
2460	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2461	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2462
2463	return FastISel::fastEmitInst_rr(MachineInstOpcode, RC: UseRC, Op0, Op1);
2464	}
2465
2466	namespace llvm {
2467	// Create the fast instruction selector for PowerPC64 ELF.
2468	FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2469	const TargetLibraryInfo *LibInfo) {
2470	// Only available on 64-bit for now.
2471	const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2472	if (Subtarget.isPPC64())
2473	return new PPCFastISel (FuncInfo, LibInfo);
2474	return nullptr;
2475	}
2476	}
2477

source code of llvm/lib/Target/PowerPC/PPCFastISel.cpp