VEISelLowering.cpp source code [llvm/lib/Target/VE/VEISelLowering.cpp]

1	//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the interfaces that VE uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "VEISelLowering.h"
15	#include "MCTargetDesc/VEMCExpr.h"
16	#include "VECustomDAG.h"
17	#include "VEInstrBuilder.h"
18	#include "VEMachineFunctionInfo.h"
19	#include "VERegisterInfo.h"
20	#include "VETargetMachine.h"
21	#include "llvm/ADT/StringSwitch.h"
22	#include "llvm/CodeGen/CallingConvLower.h"
23	#include "llvm/CodeGen/MachineFrameInfo.h"
24	#include "llvm/CodeGen/MachineFunction.h"
25	#include "llvm/CodeGen/MachineInstrBuilder.h"
26	#include "llvm/CodeGen/MachineJumpTableInfo.h"
27	#include "llvm/CodeGen/MachineModuleInfo.h"
28	#include "llvm/CodeGen/MachineRegisterInfo.h"
29	#include "llvm/CodeGen/SelectionDAG.h"
30	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31	#include "llvm/IR/DerivedTypes.h"
32	#include "llvm/IR/Function.h"
33	#include "llvm/IR/IRBuilder.h"
34	#include "llvm/IR/Module.h"
35	#include "llvm/Support/ErrorHandling.h"
36	#include "llvm/Support/KnownBits.h"
37	using namespace llvm;
38
39	#define DEBUG_TYPE "ve-lower"
40
41	//===----------------------------------------------------------------------===//
42	// Calling Convention Implementation
43	//===----------------------------------------------------------------------===//
44
45	#include "VEGenCallingConv.inc"
46
47	CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48	switch (CallConv) {
49	default:
50	return RetCC_VE_C;
51	case CallingConv::Fast:
52	return RetCC_VE_Fast;
53	}
54	}
55
56	CCAssignFn getParamCC(CallingConv::ID CallConv, bool* IsVarArg) {
57	if (IsVarArg)
58	return CC_VE2;
59	switch (CallConv) {
60	default:
61	return CC_VE_C;
62	case CallingConv::Fast:
63	return CC_VE_Fast;
64	}
65	}
66
67	bool VETargetLowering::CanLowerReturn(
68	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
70	CCAssignFn *RetCC = getReturnCC(CallConv);
71	SmallVector<CCValAssign, `16`> RVLocs;
72	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73	return CCInfo.CheckReturn(Outs, Fn: RetCC);
74	}
75
76	static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77	MVT::v256f32, MVT::v512f32, MVT::v256f64};
78
79	static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80
81	static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82
83	void VETargetLowering::initRegisterClasses() {
84	// Set up the register classes.
85	addRegisterClass(MVT::VT: i32, RC: &VE::I32RegClass);
86	addRegisterClass(MVT::VT: i64, RC: &VE::I64RegClass);
87	addRegisterClass(MVT::VT: f32, RC: &VE::F32RegClass);
88	addRegisterClass(MVT::VT: f64, RC: &VE::I64RegClass);
89	addRegisterClass(MVT::VT: f128, RC: &VE::F128RegClass);
90
91	if (Subtarget->enableVPU()) {
92	for (MVT VecVT : AllVectorVTs)
93	addRegisterClass(VecVT, &VE::V64RegClass);
94	addRegisterClass(MVT::VT: v256i1, RC: &VE::VMRegClass);
95	addRegisterClass(MVT::VT: v512i1, RC: &VE::VM512RegClass);
96	}
97	}
98
99	void VETargetLowering::initSPUActions() {
100	const auto &TM = getTargetMachine();
101	/// Load & Store {
102
103	// VE doesn't have i1 sign extending load.
104	for (MVT VT : MVT::integer_valuetypes()) {
105	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
106	setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
107	setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
108	setTruncStoreAction(VT, MVT::i1, Expand);
109	}
110
111	// VE doesn't have floating point extload/truncstore, so expand them.
112	for (MVT FPVT : MVT::fp_valuetypes()) {
113	for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114	setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
115	setTruncStoreAction(FPVT, OtherFPVT, Expand);
116	}
117	}
118
119	// VE doesn't have fp128 load/store, so expand them in custom lower.
120	setOperationAction(ISD::LOAD, MVT::f128, Custom);
121	setOperationAction(ISD::STORE, MVT::f128, Custom);
122
123	/// } Load & Store
124
125	// Custom legalize address nodes into LO/HI parts.
126	MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: `0`));
127	setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom);
128	setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom);
129	setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom);
130	setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom);
131	setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom);
132
133	/// VAARG handling {
134	setOperationAction(ISD::VASTART, MVT::Other, Custom);
135	// VAARG needs to be lowered to access with 8 bytes alignment.
136	setOperationAction(ISD::VAARG, MVT::Other, Custom);
137	// Use the default implementation.
138	setOperationAction(ISD::VACOPY, MVT::Other, Expand);
139	setOperationAction(ISD::VAEND, MVT::Other, Expand);
140	/// } VAARG handling
141
142	/// Stack {
143	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
144	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
145
146	// Use the default implementation.
147	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
148	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
149	/// } Stack
150
151	/// Branch {
152
153	// VE doesn't have BRCOND
154	setOperationAction(ISD::BRCOND, MVT::Other, Expand);
155
156	// BR_JT is not implemented yet.
157	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158
159	/// } Branch
160
161	/// Int Ops {
162	for (MVT IntVT : {MVT::i32, MVT::i64}) {
163	// VE has no REM or DIVREM operations.
164	setOperationAction(ISD::UREM, IntVT, Expand);
165	setOperationAction(ISD::SREM, IntVT, Expand);
166	setOperationAction(ISD::SDIVREM, IntVT, Expand);
167	setOperationAction(ISD::UDIVREM, IntVT, Expand);
168
169	// VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170	setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
171	setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
172	setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
173
174	// VE has no MULHU/S or U/SMUL_LOHI operations.
175	// TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176	setOperationAction(ISD::MULHU, IntVT, Expand);
177	setOperationAction(ISD::MULHS, IntVT, Expand);
178	setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
179	setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
180
181	// VE has no CTTZ, ROTL, ROTR operations.
182	setOperationAction(ISD::CTTZ, IntVT, Expand);
183	setOperationAction(ISD::ROTL, IntVT, Expand);
184	setOperationAction(ISD::ROTR, IntVT, Expand);
185
186	// VE has 64 bits instruction which works as i64 BSWAP operation. This
187	// instruction works fine as i32 BSWAP operation with an additional
188	// parameter. Use isel patterns to lower BSWAP.
189	setOperationAction(ISD::BSWAP, IntVT, Legal);
190
191	// VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192	// operations. Use isel patterns for i64, promote for i32.
193	LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194	setOperationAction(ISD::BITREVERSE, IntVT, Act);
195	setOperationAction(ISD::CTLZ, IntVT, Act);
196	setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
197	setOperationAction(ISD::CTPOP, IntVT, Act);
198
199	// VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200	// Use isel patterns for i64, promote for i32.
201	setOperationAction(ISD::AND, IntVT, Act);
202	setOperationAction(ISD::OR, IntVT, Act);
203	setOperationAction(ISD::XOR, IntVT, Act);
204
205	// Legal smax and smin
206	setOperationAction(ISD::SMAX, IntVT, Legal);
207	setOperationAction(ISD::SMIN, IntVT, Legal);
208	}
209	/// } Int Ops
210
211	/// Conversion {
212	// VE doesn't have instructions for fp<->uint, so expand them by llvm
213	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
214	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
215	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
216	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
217
218	// fp16 not supported
219	for (MVT FPVT : MVT::fp_valuetypes()) {
220	setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
221	setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
222	}
223	/// } Conversion
224
225	/// Floating-point Ops {
226	/// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227	/// and fcmp.
228
229	// VE doesn't have following floating point operations.
230	for (MVT VT : MVT::fp_valuetypes()) {
231	setOperationAction(ISD::FNEG, VT, Expand);
232	setOperationAction(ISD::FREM, VT, Expand);
233	}
234
235	// VE doesn't have fdiv of f128.
236	setOperationAction(ISD::FDIV, MVT::f128, Expand);
237
238	for (MVT FPVT : {MVT::f32, MVT::f64}) {
239	// f32 and f64 uses ConstantFP. f128 uses ConstantPool.
240	setOperationAction(ISD::ConstantFP, FPVT, Legal);
241	}
242	/// } Floating-point Ops
243
244	/// Floating-point math functions {
245
246	// VE doesn't have following floating point math functions.
247	for (MVT VT : MVT::fp_valuetypes()) {
248	setOperationAction(ISD::FABS, VT, Expand);
249	setOperationAction(ISD::FCOPYSIGN, VT, Expand);
250	setOperationAction(ISD::FCOS, VT, Expand);
251	setOperationAction(ISD::FMA, VT, Expand);
252	setOperationAction(ISD::FPOW, VT, Expand);
253	setOperationAction(ISD::FSIN, VT, Expand);
254	setOperationAction(ISD::FSQRT, VT, Expand);
255	}
256
257	// VE has single and double FMINNUM and FMAXNUM
258	for (MVT VT : {MVT::f32, MVT::f64}) {
259	setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, VT, Legal);
260	}
261
262	/// } Floating-point math functions
263
264	/// Atomic instructions {
265
266	setMaxAtomicSizeInBitsSupported(`64`);
267	setMinCmpXchgSizeInBits(`32`);
268	setSupportsUnalignedAtomics(false);
269
270	// Use custom inserter for ATOMIC_FENCE.
271	setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
272
273	// Other atomic instructions.
274	for (MVT VT : MVT::integer_valuetypes()) {
275	// Support i8/i16 atomic swap.
276	setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
277
278	// FIXME: Support "atmam" instructions.
279	setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
280	setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
281	setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
282	setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
283
284	// VE doesn't have follwing instructions.
285	setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
286	setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
287	setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
288	setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
289	setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
290	setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
291	setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
292	setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
293	}
294
295	/// } Atomic instructions
296
297	/// SJLJ instructions {
298	setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
299	setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
300	setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
301	if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
302	setLibcallName(Call: RTLIB::UNWIND_RESUME, Name: "_Unwind_SjLj_Resume");
303	/// } SJLJ instructions
304
305	// Intrinsic instructions
306	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
307	}
308
309	void VETargetLowering::initVPUActions() {
310	for (MVT LegalMaskVT : AllMaskVTs)
311	setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
312
313	for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
314	setOperationAction(Opc, MVT::v512i1, Custom);
315
316	for (MVT LegalVecVT : AllVectorVTs) {
317	setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
318	setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
319	setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
320	// Translate all vector instructions with legal element types to VVP_*
321	// nodes.
322	// TODO We will custom-widen into VVP_ nodes in the future. While we are*
323	// buildling the infrastructure for this, we only do this for legal vector
324	// VTs.
325	#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
326	setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
327	#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
328	setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
329	setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
330	setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
331	#include "VVPNodes.def"
332	}
333
334	for (MVT LegalPackedVT : AllPackedVTs) {
335	setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
336	setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
337	}
338
339	// vNt32, vNt64 ops (legal element types)
340	for (MVT VT : MVT::vector_valuetypes()) {
341	MVT ElemVT = VT.getVectorElementType();
342	unsigned ElemBits = ElemVT.getScalarSizeInBits();
343	if (ElemBits != `32` && ElemBits != `64`)
344	continue;
345
346	for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
347	setOperationAction(MemOpc, VT, Custom);
348
349	const ISD::NodeType IntReductionOCs[] = {
350	ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
351	ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
352	ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
353
354	for (unsigned IntRedOpc : IntReductionOCs)
355	setOperationAction(IntRedOpc, VT, Custom);
356	}
357
358	// v256i1 and v512i1 ops
359	for (MVT MaskVT : AllMaskVTs) {
360	// Custom lower mask ops
361	setOperationAction(ISD::STORE, MaskVT, Custom);
362	setOperationAction(ISD::LOAD, MaskVT, Custom);
363	}
364	}
365
366	SDValue
367	VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
368	bool IsVarArg,
369	const SmallVectorImpl<ISD::OutputArg> &Outs,
370	const SmallVectorImpl<SDValue> &OutVals,
371	const SDLoc &DL, SelectionDAG &DAG) const {
372	// CCValAssign - represent the assignment of the return value to locations.
373	SmallVector<CCValAssign, `16`> RVLocs;
374
375	// CCState - Info about the registers and stack slot.
376	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
377	*DAG.getContext());
378
379	// Analyze return values.
380	CCInfo.AnalyzeReturn(Outs, Fn: getReturnCC(CallConv));
381
382	SDValue Glue;
383	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
384
385	// Copy the result values into the output registers.
386	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
387	CCValAssign &VA = RVLocs [i];
388	assert(VA.isRegLoc() && "Can only return in registers!");
389	assert(!VA.needsCustom() && "Unexpected custom lowering");
390	SDValue OutVal = OutVals [i];
391
392	// Integer return values must be sign or zero extended by the callee.
393	switch (VA.getLocInfo()) {
394	case CCValAssign::Full:
395	break;
396	case CCValAssign::SExt:
397	OutVal = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
398	break;
399	case CCValAssign::ZExt:
400	OutVal = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
401	break;
402	case CCValAssign::AExt:
403	OutVal = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: OutVal);
404	break;
405	case CCValAssign::BCvt: {
406	// Convert a float return value to i64 with padding.
407	// 63 31 0
408	// +------+------+
409	// \| float\| 0 \|
410	// +------+------+
411	assert(VA.getLocVT() == MVT::i64);
412	assert(VA.getValVT() == MVT::f32);
413	SDValue Undef = SDValue(
414	DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), `0`);
415	SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
416	OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
417	MVT::i64, Undef, OutVal, Sub_f32),
418	`0`);
419	break;
420	}
421	default:
422	llvm_unreachable("Unknown loc info!");
423	}
424
425	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: OutVal, Glue);
426
427	// Guarantee that all emitted copies are stuck together with flags.
428	Glue = Chain.getValue(R: `1`);
429	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
430	}
431
432	RetOps [`0`] = Chain; // Update chain.
433
434	// Add the glue if we have it.
435	if (Glue.getNode())
436	RetOps.push_back(Elt: Glue);
437
438	return DAG.getNode(VEISD::RET_GLUE, DL, MVT::Other, RetOps);
439	}
440
441	SDValue VETargetLowering::LowerFormalArguments(
442	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
443	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
444	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
445	MachineFunction &MF = DAG.getMachineFunction();
446
447	// Get the base offset of the incoming arguments stack space.
448	unsigned ArgsBaseOffset = Subtarget->getRsaSize();
449	// Get the size of the preserved arguments area
450	unsigned ArgsPreserved = `64`;
451
452	// Analyze arguments according to CC_VE.
453	SmallVector<CCValAssign, `16`> ArgLocs;
454	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
455	*DAG.getContext());
456	// Allocate the preserved area first.
457	CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align (`8`));
458	// We already allocated the preserved area, so the stack offset computed
459	// by CC_VE would be correct now.
460	CCInfo.AnalyzeFormalArguments(Ins, Fn: getParamCC(CallConv, IsVarArg: false));
461
462	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
463	CCValAssign &VA = ArgLocs [i];
464	assert(!VA.needsCustom() && "Unexpected custom lowering");
465	if (VA.isRegLoc()) {
466	// This argument is passed in a register.
467	// All integer register arguments are promoted by the caller to i64.
468
469	// Create a virtual register for the promoted live-in value.
470	Register VReg =
471	MF.addLiveIn(PReg: VA.getLocReg(), RC: getRegClassFor(VT: VA.getLocVT()));
472	SDValue Arg = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: VA.getLocVT());
473
474	// The caller promoted the argument, so insert an Assert?ext SDNode so we
475	// won't promote the value again in this function.
476	switch (VA.getLocInfo()) {
477	case CCValAssign::SExt:
478	Arg = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Arg,
479	N2: DAG.getValueType(VA.getValVT()));
480	break;
481	case CCValAssign::ZExt:
482	Arg = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Arg,
483	N2: DAG.getValueType(VA.getValVT()));
484	break;
485	case CCValAssign::BCvt: {
486	// Extract a float argument from i64 with padding.
487	// 63 31 0
488	// +------+------+
489	// \| float\| 0 \|
490	// +------+------+
491	assert(VA.getLocVT() == MVT::i64);
492	assert(VA.getValVT() == MVT::f32);
493	SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
494	Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
495	MVT::f32, Arg, Sub_f32),
496	`0`);
497	break;
498	}
499	default:
500	break;
501	}
502
503	// Truncate the register down to the argument type.
504	if (VA.isExtInLoc())
505	Arg = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Arg);
506
507	InVals.push_back(Elt: Arg);
508	continue;
509	}
510
511	// The registers are exhausted. This argument was passed on the stack.
512	assert(VA.isMemLoc());
513	// The CC_VE_Full/Half functions compute stack offsets relative to the
514	// beginning of the arguments area at %fp + the size of reserved area.
515	unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
516	unsigned ValSize = VA.getValVT().getSizeInBits() / `8`;
517
518	// Adjust offset for a float argument by adding 4 since the argument is
519	// stored in 8 bytes buffer with offset like below. LLVM generates
520	// 4 bytes load instruction, so need to adjust offset here. This
521	// adjustment is required in only LowerFormalArguments. In LowerCall,
522	// a float argument is converted to i64 first, and stored as 8 bytes
523	// data, which is required by ABI, so no need for adjustment.
524	// 0 4
525	// +------+------+
526	// \| empty\| float\|
527	// +------+------+
528	if (VA.getValVT() == MVT::f32)
529	Offset += `4`;
530
531	int FI = MF.getFrameInfo().CreateFixedObject(Size: ValSize, SPOffset: Offset, IsImmutable: true);
532	InVals.push_back(
533	Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain,
534	Ptr: DAG.getFrameIndex(FI, VT: getPointerTy(DL: MF.getDataLayout())),
535	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)));
536	}
537
538	if (!IsVarArg)
539	return Chain;
540
541	// This function takes variable arguments, some of which may have been passed
542	// in registers %s0-%s8.
543	//
544	// The va_start intrinsic needs to know the offset to the first variable
545	// argument.
546	// TODO: need to calculate offset correctly once we support f128.
547	unsigned ArgOffset = ArgLocs.size() * `8`;
548	VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
549	// Skip the reserved area at the top of stack.
550	FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
551
552	return Chain;
553	}
554
555	// FIXME? Maybe this could be a TableGen attribute on some registers and
556	// this table could be generated automatically from RegInfo.
557	Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
558	const MachineFunction &MF) const {
559	Register Reg = StringSwitch<Register>(RegName)
560	.Case("sp", VE::SX11) // Stack pointer
561	.Case("fp", VE::SX9) // Frame pointer
562	.Case("sl", VE::SX8) // Stack limit
563	.Case("lr", VE::SX10) // Link register
564	.Case("tp", VE::SX14) // Thread pointer
565	.Case("outer", VE::SX12) // Outer regiser
566	.Case("info", VE::SX17) // Info area register
567	.Case("got", VE::SX15) // Global offset table register
568	.Case("plt", VE::SX16) // Procedure linkage table register
569	.Default(`0`);
570
571	if (Reg)
572	return Reg;
573
574	report_fatal_error(reason: "Invalid register name global variable");
575	}
576
577	//===----------------------------------------------------------------------===//
578	// TargetLowering Implementation
579	//===----------------------------------------------------------------------===//
580
581	SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
582	SmallVectorImpl<SDValue> &InVals) const {
583	SelectionDAG &DAG = CLI.DAG;
584	SDLoc DL = CLI.DL;
585	SDValue Chain = CLI.Chain;
586	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
587
588	// VE target does not yet support tail call optimization.
589	CLI.IsTailCall = false;
590
591	// Get the base offset of the outgoing arguments stack space.
592	unsigned ArgsBaseOffset = Subtarget->getRsaSize();
593	// Get the size of the preserved arguments area
594	unsigned ArgsPreserved = `8` * `8u`;
595
596	// Analyze operands of the call, assigning locations to each operand.
597	SmallVector<CCValAssign, `16`> ArgLocs;
598	CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
599	*DAG.getContext());
600	// Allocate the preserved area first.
601	CCInfo.AllocateStack(Size: ArgsPreserved, Alignment: Align (`8`));
602	// We already allocated the preserved area, so the stack offset computed
603	// by CC_VE would be correct now.
604	CCInfo.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: false));
605
606	// VE requires to use both register and stack for varargs or no-prototyped
607	// functions.
608	bool UseBoth = CLI.IsVarArg;
609
610	// Analyze operands again if it is required to store BOTH.
611	SmallVector<CCValAssign, `16`> ArgLocs2;
612	CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
613	ArgLocs2, *DAG.getContext());
614	if (UseBoth)
615	CCInfo2.AnalyzeCallOperands(Outs: CLI.Outs, Fn: getParamCC(CallConv: CLI.CallConv, IsVarArg: true));
616
617	// Get the size of the outgoing arguments stack space requirement.
618	unsigned ArgsSize = CCInfo.getStackSize();
619
620	// Keep stack frames 16-byte aligned.
621	ArgsSize = alignTo(Value: ArgsSize, Align: `16`);
622
623	// Adjust the stack pointer to make room for the arguments.
624	// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
625	// with more than 6 arguments.
626	Chain = DAG.getCALLSEQ_START(Chain, InSize: ArgsSize, OutSize: `0`, DL);
627
628	// Collect the set of registers to pass to the function and their values.
629	// This will be emitted as a sequence of CopyToReg nodes glued to the call
630	// instruction.
631	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
632
633	// Collect chains from all the memory opeations that copy arguments to the
634	// stack. They must follow the stack pointer adjustment above and precede the
635	// call instruction itself.
636	SmallVector<SDValue, `8`> MemOpChains;
637
638	// VE needs to get address of callee function in a register
639	// So, prepare to copy it to SX12 here.
640
641	// If the callee is a GlobalAddress node (quite common, every direct call is)
642	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
643	// Likewise ExternalSymbol -> TargetExternalSymbol.
644	SDValue Callee = CLI.Callee;
645
646	bool IsPICCall = isPositionIndependent();
647
648	// PC-relative references to external symbols should go through $stub.
649	// If so, we need to prepare GlobalBaseReg first.
650	const TargetMachine &TM = DAG.getTarget();
651	const GlobalValue GV = nullptr*;
652	auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
653	if (CalleeG)
654	GV = CalleeG->getGlobal();
655	bool Local = TM.shouldAssumeDSOLocal(GV);
656	bool UsePlt = !Local;
657	MachineFunction &MF = DAG.getMachineFunction();
658
659	// Turn GlobalAddress/ExternalSymbol node into a value node
660	// containing the address of them here.
661	if (CalleeG) {
662	if (IsPICCall) {
663	if (UsePlt)
664	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
665	Callee = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: `0`, TargetFlags: `0`);
666	Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
667	} else {
668	Callee =
669	makeHiLoPair(Op: Callee, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
670	}
671	} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
672	if (IsPICCall) {
673	if (UsePlt)
674	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
675	Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT, TargetFlags: `0`);
676	Callee = DAG.getNode(Opcode: VEISD::GETFUNPLT, DL, VT: PtrVT, Operand: Callee);
677	} else {
678	Callee =
679	makeHiLoPair(Op: Callee, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
680	}
681	}
682
683	RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
684
685	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
686	CCValAssign &VA = ArgLocs [i];
687	SDValue Arg = CLI.OutVals [i];
688
689	// Promote the value if needed.
690	switch (VA.getLocInfo()) {
691	default:
692	llvm_unreachable("Unknown location info!");
693	case CCValAssign::Full:
694	break;
695	case CCValAssign::SExt:
696	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
697	break;
698	case CCValAssign::ZExt:
699	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
700	break;
701	case CCValAssign::AExt:
702	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Arg);
703	break;
704	case CCValAssign::BCvt: {
705	// Convert a float argument to i64 with padding.
706	// 63 31 0
707	// +------+------+
708	// \| float\| 0 \|
709	// +------+------+
710	assert(VA.getLocVT() == MVT::i64);
711	assert(VA.getValVT() == MVT::f32);
712	SDValue Undef = SDValue(
713	DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), `0`);
714	SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
715	Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
716	MVT::i64, Undef, Arg, Sub_f32),
717	`0`);
718	break;
719	}
720	}
721
722	if (VA.isRegLoc()) {
723	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
724	if (!UseBoth)
725	continue;
726	VA = ArgLocs2 [i];
727	}
728
729	assert(VA.isMemLoc());
730
731	// Create a store off the stack pointer for this argument.
732	SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
733	// The argument area starts at %fp/%sp + the size of reserved area.
734	SDValue PtrOff =
735	DAG.getIntPtrConstant(Val: VA.getLocMemOffset() + ArgsBaseOffset, DL);
736	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: PtrOff);
737	MemOpChains.push_back(
738	Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
739	}
740
741	// Emit all stores, make sure they occur before the call.
742	if (!MemOpChains.empty())
743	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
744
745	// Build a sequence of CopyToReg nodes glued together with token chain and
746	// glue operands which copy the outgoing args into registers. The InGlue is
747	// necessary since all emitted instructions must be stuck together in order
748	// to pass the live physical registers.
749	SDValue InGlue;
750	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i) {
751	Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegsToPass [i].first,
752	N: RegsToPass [i].second, Glue: InGlue);
753	InGlue = Chain.getValue(R: `1`);
754	}
755
756	// Build the operands for the call instruction itself.
757	SmallVector<SDValue, `8`> Ops;
758	Ops.push_back(Elt: Chain);
759	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i)
760	Ops.push_back(Elt: DAG.getRegister(Reg: RegsToPass [i].first,
761	VT: RegsToPass [i].second.getValueType()));
762
763	// Add a register mask operand representing the call-preserved registers.
764	const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
765	const uint32_t *Mask =
766	TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CC: CLI.CallConv);
767	assert(Mask && "Missing call preserved mask for calling convention");
768	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
769
770	// Make sure the CopyToReg nodes are glued to the call instruction which
771	// consumes the registers.
772	if (InGlue.getNode())
773	Ops.push_back(Elt: InGlue);
774
775	// Now the call itself.
776	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
777	Chain = DAG.getNode(Opcode: VEISD::CALL, DL, VTList: NodeTys, Ops);
778	InGlue = Chain.getValue(R: `1`);
779
780	// Revert the stack pointer immediately after the call.
781	Chain = DAG.getCALLSEQ_END(Chain, Size1: ArgsSize, Size2: `0`, Glue: InGlue, DL);
782	InGlue = Chain.getValue(R: `1`);
783
784	// Now extract the return values. This is more or less the same as
785	// LowerFormalArguments.
786
787	// Assign locations to each value returned by this call.
788	SmallVector<CCValAssign, `16`> RVLocs;
789	CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
790	*DAG.getContext());
791
792	// Set inreg flag manually for codegen generated library calls that
793	// return float.
794	if (CLI.Ins.size() == `1` && CLI.Ins[`0`].VT == MVT::f32 && !CLI.CB)
795	CLI.Ins [`0`].Flags.setInReg();
796
797	RVInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: getReturnCC(CallConv: CLI.CallConv));
798
799	// Copy all of the result registers out of their specified physreg.
800	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
801	CCValAssign &VA = RVLocs [i];
802	assert(!VA.needsCustom() && "Unexpected custom lowering");
803	Register Reg = VA.getLocReg();
804
805	// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
806	// reside in the same register in the high and low bits. Reuse the
807	// CopyFromReg previous node to avoid duplicate copies.
808	SDValue RV;
809	if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Val: Chain.getOperand(i: `1`)))
810	if (SrcReg->getReg() == Reg && Chain ->getOpcode() == ISD::CopyFromReg)
811	RV = Chain.getValue(R: `0`);
812
813	// But usually we'll create a new CopyFromReg for a different register.
814	if (!RV.getNode()) {
815	RV = DAG.getCopyFromReg(Chain, dl: DL, Reg, VT: RVLocs [i].getLocVT(), Glue: InGlue);
816	Chain = RV.getValue(R: `1`);
817	InGlue = Chain.getValue(R: `2`);
818	}
819
820	// The callee promoted the return value, so insert an Assert?ext SDNode so
821	// we won't promote the value again in this function.
822	switch (VA.getLocInfo()) {
823	case CCValAssign::SExt:
824	RV = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: RV,
825	N2: DAG.getValueType(VA.getValVT()));
826	break;
827	case CCValAssign::ZExt:
828	RV = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: RV,
829	N2: DAG.getValueType(VA.getValVT()));
830	break;
831	case CCValAssign::BCvt: {
832	// Extract a float return value from i64 with padding.
833	// 63 31 0
834	// +------+------+
835	// \| float\| 0 \|
836	// +------+------+
837	assert(VA.getLocVT() == MVT::i64);
838	assert(VA.getValVT() == MVT::f32);
839	SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
840	RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
841	MVT::f32, RV, Sub_f32),
842	`0`);
843	break;
844	}
845	default:
846	break;
847	}
848
849	// Truncate the register down to the return value type.
850	if (VA.isExtInLoc())
851	RV = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: RV);
852
853	InVals.push_back(Elt: RV);
854	}
855
856	return Chain;
857	}
858
859	bool VETargetLowering::isOffsetFoldingLegal(
860	const GlobalAddressSDNode GA) const* {
861	// VE uses 64 bit addressing, so we need multiple instructions to generate
862	// an address. Folding address with offset increases the number of
863	// instructions, so that we disable it here. Offsets will be folded in
864	// the DAG combine later if it worth to do so.
865	return false;
866	}
867
868	/// isFPImmLegal - Returns true if the target can instruction select the
869	/// specified FP immediate natively. If false, the legalizer will
870	/// materialize the FP immediate as a load from a constant pool.
871	bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
872	bool ForCodeSize) const {
873	return VT == MVT::f32 \|\| VT == MVT::f64;
874	}
875
876	/// Determine if the target supports unaligned memory accesses.
877	///
878	/// This function returns true if the target allows unaligned memory accesses
879	/// of the specified type in the given address space. If true, it also returns
880	/// whether the unaligned memory access is "fast" in the last argument by
881	/// reference. This is used, for example, in situations where an array
882	/// copy/move/set is converted to a sequence of store operations. Its use
883	/// helps to ensure that such replacements don't generate code that causes an
884	/// alignment error (trap) on the target machine.
885	bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
886	unsigned AddrSpace,
887	Align A,
888	MachineMemOperand::Flags,
889	unsigned Fast) const* {
890	if (Fast) {
891	// It's fast anytime on VE
892	*Fast = `1`;
893	}
894	return true;
895	}
896
897	VETargetLowering::VETargetLowering(const TargetMachine &TM,
898	const VESubtarget &STI)
899	: TargetLowering (TM), Subtarget(&STI) {
900	// Instructions which use registers as conditionals examine all the
901	// bits (as does the pseudo SELECT_CC expansion). I don't think it
902	// matters much whether it's ZeroOrOneBooleanContent, or
903	// ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
904	// former.
905	setBooleanContents(ZeroOrOneBooleanContent);
906	setBooleanVectorContents(ZeroOrOneBooleanContent);
907
908	initRegisterClasses();
909	initSPUActions();
910	initVPUActions();
911
912	setStackPointerRegisterToSaveRestore(VE::SX11);
913
914	// We have target-specific dag combine patterns for the following nodes:
915	setTargetDAGCombine(ISD::TRUNCATE);
916	setTargetDAGCombine(ISD::SELECT);
917	setTargetDAGCombine(ISD::SELECT_CC);
918
919	// Set function alignment to 16 bytes
920	setMinFunctionAlignment(Align (`16`));
921
922	// VE stores all argument by 8 bytes alignment
923	setMinStackArgumentAlignment(Align (`8`));
924
925	computeRegisterProperties(Subtarget->getRegisterInfo());
926	}
927
928	const char VETargetLowering::getTargetNodeName(unsigned* Opcode) const {
929	#define TARGET_NODE_CASE(NAME) \
930	case VEISD::NAME: \
931	return "VEISD::" #NAME;
932	switch ((VEISD::NodeType)Opcode) {
933	case VEISD::FIRST_NUMBER:
934	break;
935	TARGET_NODE_CASE(CMPI)
936	TARGET_NODE_CASE(CMPU)
937	TARGET_NODE_CASE(CMPF)
938	TARGET_NODE_CASE(CMPQ)
939	TARGET_NODE_CASE(CMOV)
940	TARGET_NODE_CASE(CALL)
941	TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
942	TARGET_NODE_CASE(EH_SJLJ_SETJMP)
943	TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
944	TARGET_NODE_CASE(GETFUNPLT)
945	TARGET_NODE_CASE(GETSTACKTOP)
946	TARGET_NODE_CASE(GETTLSADDR)
947	TARGET_NODE_CASE(GLOBAL_BASE_REG)
948	TARGET_NODE_CASE(Hi)
949	TARGET_NODE_CASE(Lo)
950	TARGET_NODE_CASE(RET_GLUE)
951	TARGET_NODE_CASE(TS1AM)
952	TARGET_NODE_CASE(VEC_UNPACK_LO)
953	TARGET_NODE_CASE(VEC_UNPACK_HI)
954	TARGET_NODE_CASE(VEC_PACK)
955	TARGET_NODE_CASE(VEC_BROADCAST)
956	TARGET_NODE_CASE(REPL_I32)
957	TARGET_NODE_CASE(REPL_F32)
958
959	TARGET_NODE_CASE(LEGALAVL)
960
961	// Register the VVP_ SDNodes.*
962	#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
963	#include "VVPNodes.def"
964	}
965	#undef TARGET_NODE_CASE
966	return nullptr;
967	}
968
969	EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
970	EVT VT) const {
971	return MVT::i32;
972	}
973
974	// Convert to a target node and set target flags.
975	SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
976	SelectionDAG &DAG) const {
977	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
978	return DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc (GA),
979	VT: GA->getValueType(ResNo: `0`), offset: GA->getOffset(), TargetFlags: TF);
980
981	if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Val&: Op))
982	return DAG.getTargetBlockAddress(BA: BA->getBlockAddress(), VT: Op.getValueType(),
983	Offset: `0`, TargetFlags: TF);
984
985	if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val&: Op))
986	return DAG.getTargetConstantPool(C: CP->getConstVal(), VT: CP->getValueType(ResNo: `0`),
987	Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: TF);
988
989	if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Val&: Op))
990	return DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT: ES->getValueType(ResNo: `0`),
991	TargetFlags: TF);
992
993	if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Val&: Op))
994	return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: JT->getValueType(ResNo: `0`), TargetFlags: TF);
995
996	llvm_unreachable("Unhandled address SDNode");
997	}
998
999	// Split Op into high and low parts according to HiTF and LoTF.
1000	// Return an ADD node combining the parts.
1001	SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1002	SelectionDAG &DAG) const {
1003	SDLoc DL(Op);
1004	EVT VT = Op.getValueType();
1005	SDValue Hi = DAG.getNode(Opcode: VEISD::Hi, DL, VT, Operand: withTargetFlags(Op, TF: HiTF, DAG));
1006	SDValue Lo = DAG.getNode(Opcode: VEISD::Lo, DL, VT, Operand: withTargetFlags(Op, TF: LoTF, DAG));
1007	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Hi, N2: Lo);
1008	}
1009
1010	// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1011	// or ExternalSymbol SDNode.
1012	SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1013	SDLoc DL(Op);
1014	EVT PtrVT = Op.getValueType();
1015
1016	// Handle PIC mode first. VE needs a got load for every variable!
1017	if (isPositionIndependent()) {
1018	auto GlobalN = dyn_cast<GlobalAddressSDNode>(Val&: Op);
1019
1020	if (isa<ConstantPoolSDNode>(Val: Op) \|\| isa<JumpTableSDNode>(Val: Op) \|\|
1021	(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1022	// Create following instructions for local linkage PIC code.
1023	// lea %reg, label@gotoff_lo
1024	// and %reg, %reg, (32)0
1025	// lea.sl %reg, label@gotoff_hi(%reg, %got)
1026	SDValue HiLo = makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_GOTOFF_HI32,
1027	LoTF: VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1028	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1029	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1030	}
1031	// Create following instructions for not local linkage PIC code.
1032	// lea %reg, label@got_lo
1033	// and %reg, %reg, (32)0
1034	// lea.sl %reg, label@got_hi(%reg)
1035	// ld %reg, (%reg, %got)
1036	SDValue HiLo = makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_GOT_HI32,
1037	LoTF: VEMCExpr::VK_VE_GOT_LO32, DAG);
1038	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrVT);
1039	SDValue AbsAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: GlobalBase, N2: HiLo);
1040	return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: AbsAddr,
1041	PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction()));
1042	}
1043
1044	// This is one of the absolute code models.
1045	switch (getTargetMachine().getCodeModel()) {
1046	default:
1047	llvm_unreachable("Unsupported absolute code model");
1048	case CodeModel::Small:
1049	case CodeModel::Medium:
1050	case CodeModel::Large:
1051	// abs64.
1052	return makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
1053	}
1054	}
1055
1056	/// Custom Lower {
1057
1058	// The mappings for emitLeading/TrailingFence for VE is designed by following
1059	// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1060	Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1061	Instruction *Inst,
1062	AtomicOrdering Ord) const {
1063	switch (Ord) {
1064	case AtomicOrdering::NotAtomic:
1065	case AtomicOrdering::Unordered:
1066	llvm_unreachable("Invalid fence: unordered/non-atomic");
1067	case AtomicOrdering::Monotonic:
1068	case AtomicOrdering::Acquire:
1069	return nullptr; // Nothing to do
1070	case AtomicOrdering::Release:
1071	case AtomicOrdering::AcquireRelease:
1072	return Builder.CreateFence(Ordering: AtomicOrdering::Release);
1073	case AtomicOrdering::SequentiallyConsistent:
1074	if (!Inst->hasAtomicStore())
1075	return nullptr; // Nothing to do
1076	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1077	}
1078	llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1079	}
1080
1081	Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1082	Instruction *Inst,
1083	AtomicOrdering Ord) const {
1084	switch (Ord) {
1085	case AtomicOrdering::NotAtomic:
1086	case AtomicOrdering::Unordered:
1087	llvm_unreachable("Invalid fence: unordered/not-atomic");
1088	case AtomicOrdering::Monotonic:
1089	case AtomicOrdering::Release:
1090	return nullptr; // Nothing to do
1091	case AtomicOrdering::Acquire:
1092	case AtomicOrdering::AcquireRelease:
1093	return Builder.CreateFence(Ordering: AtomicOrdering::Acquire);
1094	case AtomicOrdering::SequentiallyConsistent:
1095	return Builder.CreateFence(Ordering: AtomicOrdering::SequentiallyConsistent);
1096	}
1097	llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1098	}
1099
1100	SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1101	SelectionDAG &DAG) const {
1102	SDLoc DL(Op);
1103	AtomicOrdering FenceOrdering =
1104	static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: `1`));
1105	SyncScope::ID FenceSSID =
1106	static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: `2`));
1107
1108	// VE uses Release consistency, so need a fence instruction if it is a
1109	// cross-thread fence.
1110	if (FenceSSID == SyncScope::System) {
1111	switch (FenceOrdering) {
1112	case AtomicOrdering::NotAtomic:
1113	case AtomicOrdering::Unordered:
1114	case AtomicOrdering::Monotonic:
1115	// No need to generate fencem instruction here.
1116	break;
1117	case AtomicOrdering::Acquire:
1118	// Generate "fencem 2" as acquire fence.
1119	return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1120	DAG.getTargetConstant(`2`, DL, MVT::i32),
1121	Op.getOperand(`0`)),
1122	`0`);
1123	case AtomicOrdering::Release:
1124	// Generate "fencem 1" as release fence.
1125	return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1126	DAG.getTargetConstant(`1`, DL, MVT::i32),
1127	Op.getOperand(`0`)),
1128	`0`);
1129	case AtomicOrdering::AcquireRelease:
1130	case AtomicOrdering::SequentiallyConsistent:
1131	// Generate "fencem 3" as acq_rel and seq_cst fence.
1132	// FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1133	// so seq_cst may require more instruction for them.
1134	return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1135	DAG.getTargetConstant(`3`, DL, MVT::i32),
1136	Op.getOperand(`0`)),
1137	`0`);
1138	}
1139	}
1140
1141	// MEMBARRIER is a compiler barrier; it codegens to a no-op.
1142	return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(`0`));
1143	}
1144
1145	TargetLowering::AtomicExpansionKind
1146	VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
1147	// We have TS1AM implementation for i8/i16/i32/i64, so use it.
1148	if (AI->getOperation() == AtomicRMWInst::Xchg) {
1149	return AtomicExpansionKind::None;
1150	}
1151	// FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1152
1153	// Otherwise, expand it using compare and exchange instruction to not call
1154	// __sync_fetch_and_ functions.*
1155	return AtomicExpansionKind::CmpXChg;
1156	}
1157
1158	static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1159	SDValue &Bits) {
1160	SDLoc DL(Op);
1161	AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1162	SDValue Ptr = N->getOperand(Num: `1`);
1163	SDValue Val = N->getOperand(Num: `2`);
1164	EVT PtrVT = Ptr.getValueType();
1165	bool Byte = N->getMemoryVT() == MVT::i8;
1166	// Remainder = AND Ptr, 3
1167	// Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1168	// Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1169	// Bits = Remainder << 3
1170	// NewVal = Val << Bits
1171	SDValue Const3 = DAG.getConstant(Val: `3`, DL, VT: PtrVT);
1172	SDValue Remainder = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, Ops: {Ptr, Const3});
1173	SDValue Mask = Byte ? DAG.getConstant(`1`, DL, MVT::i32)
1174	: DAG.getConstant(`3`, DL, MVT::i32);
1175	Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1176	Bits = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, Ops: {Remainder, Const3});
1177	return DAG.getNode(Opcode: ISD::SHL, DL, VT: Val.getValueType(), Ops: {Val, Bits});
1178	}
1179
1180	static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1181	SDValue Bits) {
1182	SDLoc DL(Op);
1183	EVT VT = Data.getValueType();
1184	bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1185	// NewData = Data >> Bits
1186	// Result = NewData & 0xff ; If Byte is true (1 byte)
1187	// Result = NewData & 0xffff ; If Byte is false (2 bytes)
1188
1189	SDValue NewData = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Data, N2: Bits);
1190	return DAG.getNode(Opcode: ISD::AND, DL, VT,
1191	Ops: {NewData, DAG.getConstant(Val: Byte ? `0xff` : `0xffff`, DL, VT)});
1192	}
1193
1194	SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1195	SelectionDAG &DAG) const {
1196	SDLoc DL(Op);
1197	AtomicSDNode *N = cast<AtomicSDNode>(Val&: Op);
1198
1199	if (N->getMemoryVT() == MVT::i8) {
1200	// For i8, use "ts1am"
1201	// Input:
1202	// ATOMIC_SWAP Ptr, Val, Order
1203	//
1204	// Output:
1205	// Remainder = AND Ptr, 3
1206	// Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1207	// Bits = Remainder << 3
1208	// NewVal = Val << Bits
1209	//
1210	// Aligned = AND Ptr, -4
1211	// Data = TS1AM Aligned, Flag, NewVal
1212	//
1213	// NewData = Data >> Bits
1214	// Result = NewData & 0xff ; 1 byte result
1215	SDValue Flag;
1216	SDValue Bits;
1217	SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1218
1219	SDValue Ptr = N->getOperand(Num: `1`);
1220	SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1221	{Ptr, DAG.getConstant(-`4`, DL, MVT::i64)});
1222	SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1223	VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: `0`),
1224	VT2: Op.getNode()->getValueType(ResNo: `1`)),
1225	Ops: {N->getChain(), Aligned, Flag, NewVal},
1226	MMO: N->getMemOperand());
1227
1228	SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1229	SDValue Chain = TS1AM.getValue(R: `1`);
1230	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1231	}
1232	if (N->getMemoryVT() == MVT::i16) {
1233	// For i16, use "ts1am"
1234	SDValue Flag;
1235	SDValue Bits;
1236	SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1237
1238	SDValue Ptr = N->getOperand(Num: `1`);
1239	SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1240	{Ptr, DAG.getConstant(-`4`, DL, MVT::i64)});
1241	SDValue TS1AM = DAG.getAtomic(Opcode: VEISD::TS1AM, dl: DL, MemVT: N->getMemoryVT(),
1242	VTList: DAG.getVTList(VT1: Op.getNode()->getValueType(ResNo: `0`),
1243	VT2: Op.getNode()->getValueType(ResNo: `1`)),
1244	Ops: {N->getChain(), Aligned, Flag, NewVal},
1245	MMO: N->getMemOperand());
1246
1247	SDValue Result = finalizeTS1AM(Op, DAG, Data: TS1AM, Bits);
1248	SDValue Chain = TS1AM.getValue(R: `1`);
1249	return DAG.getMergeValues(Ops: {Result, Chain}, dl: DL);
1250	}
1251	// Otherwise, let llvm legalize it.
1252	return Op;
1253	}
1254
1255	SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1256	SelectionDAG &DAG) const {
1257	return makeAddress(Op, DAG);
1258	}
1259
1260	SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1261	SelectionDAG &DAG) const {
1262	return makeAddress(Op, DAG);
1263	}
1264
1265	SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1266	SelectionDAG &DAG) const {
1267	return makeAddress(Op, DAG);
1268	}
1269
1270	SDValue
1271	VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1272	SelectionDAG &DAG) const {
1273	SDLoc DL(Op);
1274
1275	// Generate the following code:
1276	// t1: ch,glue = callseq_start t0, 0, 0
1277	// t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1278	// t3: ch,glue = callseq_end t2, 0, 0, t2:2
1279	// t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1280	SDValue Label = withTargetFlags(Op, TF: `0`, DAG);
1281	EVT PtrVT = Op.getValueType();
1282
1283	// Lowering the machine isd will make sure everything is in the right
1284	// location.
1285	SDValue Chain = DAG.getEntryNode();
1286	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1287	const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1288	MF: DAG.getMachineFunction(), CC: CallingConv::C);
1289	Chain = DAG.getCALLSEQ_START(Chain, InSize: `64`, OutSize: `0`, DL);
1290	SDValue Args[] = {Chain, Label, DAG.getRegisterMask(RegMask: Mask), Chain.getValue(R: `1`)};
1291	Chain = DAG.getNode(Opcode: VEISD::GETTLSADDR, DL, VTList: NodeTys, Ops: Args);
1292	Chain = DAG.getCALLSEQ_END(Chain, Size1: `64`, Size2: `0`, Glue: Chain.getValue(R: `1`), DL);
1293	Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(`1`));
1294
1295	// GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1296	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1297	MFI.setHasCalls(true);
1298
1299	// Also generate code to prepare a GOT register if it is PIC.
1300	if (isPositionIndependent()) {
1301	MachineFunction &MF = DAG.getMachineFunction();
1302	Subtarget->getInstrInfo()->getGlobalBaseReg(MF: &MF);
1303	}
1304
1305	return Chain;
1306	}
1307
1308	SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1309	SelectionDAG &DAG) const {
1310	// The current implementation of nld (2.26) doesn't allow local exec model
1311	// code described in VE-tls_v1.1.pdf (1) as its input. Instead, we always*
1312	// generate the general dynamic model code sequence.
1313	//
1314	// 1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf*
1315	return lowerToTLSGeneralDynamicModel(Op, DAG);
1316	}
1317
1318	SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1319	return makeAddress(Op, DAG);
1320	}
1321
1322	// Lower a f128 load into two f64 loads.
1323	static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1324	SDLoc DL(Op);
1325	LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1326	assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1327	Align Alignment = LdNode->getAlign();
1328	if (Alignment > `8`)
1329	Alignment = Align (`8`);
1330
1331	SDValue Lo64 =
1332	DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1333	LdNode->getPointerInfo(), Alignment,
1334	LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1335	: MachineMemOperand::MONone);
1336	EVT AddrVT = LdNode->getBasePtr().getValueType();
1337	SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: LdNode->getBasePtr(),
1338	N2: DAG.getConstant(Val: `8`, DL, VT: AddrVT));
1339	SDValue Hi64 =
1340	DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1341	LdNode->getPointerInfo(), Alignment,
1342	LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1343	: MachineMemOperand::MONone);
1344
1345	SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1346	SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1347
1348	// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1349	SDNode *InFP128 =
1350	DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1351	InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1352	SDValue(InFP128, `0`), Hi64, SubRegEven);
1353	InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1354	SDValue(InFP128, `0`), Lo64, SubRegOdd);
1355	SDValue OutChains[`2`] = {SDValue (Lo64.getNode(), `1`),
1356	SDValue (Hi64.getNode(), `1`)};
1357	SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1358	SDValue Ops[`2`] = {SDValue (InFP128, `0`), OutChain};
1359	return DAG.getMergeValues(Ops, dl: DL);
1360	}
1361
1362	// Lower a vXi1 load into following instructions
1363	// LDrii %1, (,%addr)
1364	// LVMxir %vm, 0, %1
1365	// LDrii %2, 8(,%addr)
1366	// LVMxir %vm, 0, %2
1367	// ...
1368	static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1369	SDLoc DL(Op);
1370	LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Val: Op.getNode());
1371	assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1372
1373	SDValue BasePtr = LdNode->getBasePtr();
1374	Align Alignment = LdNode->getAlign();
1375	if (Alignment > `8`)
1376	Alignment = Align (`8`);
1377
1378	EVT AddrVT = BasePtr.getValueType();
1379	EVT MemVT = LdNode->getMemoryVT();
1380	if (MemVT == MVT::v256i1 \|\| MemVT == MVT::v4i64) {
1381	SDValue OutChains[`4`];
1382	SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1383	for (int i = `0`; i < `4`; ++i) {
1384	// Generate load dag and prepare chains.
1385	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1386	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1387	SDValue Val =
1388	DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1389	LdNode->getPointerInfo(), Alignment,
1390	LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1391	: MachineMemOperand::MONone);
1392	OutChains[i] = SDValue (Val.getNode(), `1`);
1393
1394	VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1395	DAG.getTargetConstant(i, DL, MVT::i64), Val,
1396	SDValue(VM, `0`));
1397	}
1398	SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1399	SDValue Ops[`2`] = {SDValue (VM, `0`), OutChain};
1400	return DAG.getMergeValues(Ops, dl: DL);
1401	} else if (MemVT == MVT::v512i1 \|\| MemVT == MVT::v8i64) {
1402	SDValue OutChains[`8`];
1403	SDNode *VM = DAG.getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT: MemVT);
1404	for (int i = `0`; i < `8`; ++i) {
1405	// Generate load dag and prepare chains.
1406	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1407	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1408	SDValue Val =
1409	DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1410	LdNode->getPointerInfo(), Alignment,
1411	LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1412	: MachineMemOperand::MONone);
1413	OutChains[i] = SDValue (Val.getNode(), `1`);
1414
1415	VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1416	DAG.getTargetConstant(i, DL, MVT::i64), Val,
1417	SDValue(VM, `0`));
1418	}
1419	SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1420	SDValue Ops[`2`] = {SDValue (VM, `0`), OutChain};
1421	return DAG.getMergeValues(Ops, dl: DL);
1422	} else {
1423	// Otherwise, ask llvm to expand it.
1424	return SDValue ();
1425	}
1426	}
1427
1428	SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1429	LoadSDNode *LdNode = cast<LoadSDNode>(Val: Op.getNode());
1430	EVT MemVT = LdNode->getMemoryVT();
1431
1432	// If VPU is enabled, always expand non-mask vector loads to VVP
1433	if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1434	return lowerToVVP(Op, DAG);
1435
1436	SDValue BasePtr = LdNode->getBasePtr();
1437	if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1438	// Do not expand store instruction with frame index here because of
1439	// dependency problems. We expand it later in eliminateFrameIndex().
1440	return Op;
1441	}
1442
1443	if (MemVT == MVT::f128)
1444	return lowerLoadF128(Op, DAG);
1445	if (isMaskType(SomeVT: MemVT))
1446	return lowerLoadI1(Op, DAG);
1447
1448	return Op;
1449	}
1450
1451	// Lower a f128 store into two f64 stores.
1452	static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1453	SDLoc DL(Op);
1454	StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1455	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1456
1457	SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1458	SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1459
1460	SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1461	StNode->getValue(), SubRegEven);
1462	SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1463	StNode->getValue(), SubRegOdd);
1464
1465	Align Alignment = StNode->getAlign();
1466	if (Alignment > `8`)
1467	Alignment = Align (`8`);
1468
1469	// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1470	SDValue OutChains[`2`];
1471	OutChains[`0`] =
1472	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (Lo64, `0`),
1473	Ptr: StNode->getBasePtr(), PtrInfo: MachinePointerInfo (), Alignment,
1474	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1475	: MachineMemOperand::MONone);
1476	EVT AddrVT = StNode->getBasePtr().getValueType();
1477	SDValue HiPtr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: StNode->getBasePtr(),
1478	N2: DAG.getConstant(Val: `8`, DL, VT: AddrVT));
1479	OutChains[`1`] =
1480	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (Hi64, `0`), Ptr: HiPtr,
1481	PtrInfo: MachinePointerInfo (), Alignment,
1482	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1483	: MachineMemOperand::MONone);
1484	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1485	}
1486
1487	// Lower a vXi1 store into following instructions
1488	// SVMi %1, %vm, 0
1489	// STrii %1, (,%addr)
1490	// SVMi %2, %vm, 1
1491	// STrii %2, 8(,%addr)
1492	// ...
1493	static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1494	SDLoc DL(Op);
1495	StoreSDNode *StNode = dyn_cast<StoreSDNode>(Val: Op.getNode());
1496	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1497
1498	SDValue BasePtr = StNode->getBasePtr();
1499	Align Alignment = StNode->getAlign();
1500	if (Alignment > `8`)
1501	Alignment = Align (`8`);
1502	EVT AddrVT = BasePtr.getValueType();
1503	EVT MemVT = StNode->getMemoryVT();
1504	if (MemVT == MVT::v256i1 \|\| MemVT == MVT::v4i64) {
1505	SDValue OutChains[`4`];
1506	for (int i = `0`; i < `4`; ++i) {
1507	SDNode *V =
1508	DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1509	DAG.getTargetConstant(i, DL, MVT::i64));
1510	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1511	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1512	OutChains[i] =
1513	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (V, `0`), Ptr: Addr,
1514	PtrInfo: MachinePointerInfo (), Alignment,
1515	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1516	: MachineMemOperand::MONone);
1517	}
1518	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1519	} else if (MemVT == MVT::v512i1 \|\| MemVT == MVT::v8i64) {
1520	SDValue OutChains[`8`];
1521	for (int i = `0`; i < `8`; ++i) {
1522	SDNode *V =
1523	DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1524	DAG.getTargetConstant(i, DL, MVT::i64));
1525	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: BasePtr,
1526	N2: DAG.getConstant(Val: `8` * i, DL, VT: AddrVT));
1527	OutChains[i] =
1528	DAG.getStore(Chain: StNode->getChain(), dl: DL, Val: SDValue (V, `0`), Ptr: Addr,
1529	PtrInfo: MachinePointerInfo (), Alignment,
1530	MMOFlags: StNode->isVolatile() ? MachineMemOperand::MOVolatile
1531	: MachineMemOperand::MONone);
1532	}
1533	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1534	} else {
1535	// Otherwise, ask llvm to expand it.
1536	return SDValue ();
1537	}
1538	}
1539
1540	SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1541	StoreSDNode *StNode = cast<StoreSDNode>(Val: Op.getNode());
1542	assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1543	EVT MemVT = StNode->getMemoryVT();
1544
1545	// If VPU is enabled, always expand non-mask vector stores to VVP
1546	if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(SomeVT: MemVT))
1547	return lowerToVVP(Op, DAG);
1548
1549	SDValue BasePtr = StNode->getBasePtr();
1550	if (isa<FrameIndexSDNode>(Val: BasePtr.getNode())) {
1551	// Do not expand store instruction with frame index here because of
1552	// dependency problems. We expand it later in eliminateFrameIndex().
1553	return Op;
1554	}
1555
1556	if (MemVT == MVT::f128)
1557	return lowerStoreF128(Op, DAG);
1558	if (isMaskType(SomeVT: MemVT))
1559	return lowerStoreI1(Op, DAG);
1560
1561	// Otherwise, ask llvm to expand it.
1562	return SDValue ();
1563	}
1564
1565	SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1566	MachineFunction &MF = DAG.getMachineFunction();
1567	VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1568	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1569
1570	// Need frame address to find the address of VarArgsFrameIndex.
1571	MF.getFrameInfo().setFrameAddressIsTaken(true);
1572
1573	// vastart just stores the address of the VarArgsFrameIndex slot into the
1574	// memory location argument.
1575	SDLoc DL(Op);
1576	SDValue Offset =
1577	DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1578	DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1579	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
1580	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: Offset, Ptr: Op.getOperand(i: `1`),
1581	PtrInfo: MachinePointerInfo (SV));
1582	}
1583
1584	SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1585	SDNode *Node = Op.getNode();
1586	EVT VT = Node->getValueType(ResNo: `0`);
1587	SDValue InChain = Node->getOperand(Num: `0`);
1588	SDValue VAListPtr = Node->getOperand(Num: `1`);
1589	EVT PtrVT = VAListPtr.getValueType();
1590	const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: `2`))->getValue();
1591	SDLoc DL(Node);
1592	SDValue VAList =
1593	DAG.getLoad(VT: PtrVT, dl: DL, Chain: InChain, Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV));
1594	SDValue Chain = VAList.getValue(R: `1`);
1595	SDValue NextPtr;
1596
1597	if (VT == MVT::f128) {
1598	// VE f128 values must be stored with 16 bytes alignment. We don't
1599	// know the actual alignment of VAList, so we take alignment of it
1600	// dynamically.
1601	int Align = `16`;
1602	VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1603	N2: DAG.getConstant(Val: Align - `1`, DL, VT: PtrVT));
1604	VAList = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: VAList,
1605	N2: DAG.getConstant(Val: -Align, DL, VT: PtrVT));
1606	// Increment the pointer, VAList, by 16 to the next vaarg.
1607	NextPtr =
1608	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `16`, DL));
1609	} else if (VT == MVT::f32) {
1610	// float --> need special handling like below.
1611	// 0 4
1612	// +------+------+
1613	// \| empty\| float\|
1614	// +------+------+
1615	// Increment the pointer, VAList, by 8 to the next vaarg.
1616	NextPtr =
1617	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `8`, DL));
1618	// Then, adjust VAList.
1619	unsigned InternalOffset = `4`;
1620	VAList = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList,
1621	N2: DAG.getConstant(Val: InternalOffset, DL, VT: PtrVT));
1622	} else {
1623	// Increment the pointer, VAList, by 8 to the next vaarg.
1624	NextPtr =
1625	DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: VAList, N2: DAG.getIntPtrConstant(Val: `8`, DL));
1626	}
1627
1628	// Store the incremented VAList to the legalized pointer.
1629	InChain = DAG.getStore(Chain, dl: DL, Val: NextPtr, Ptr: VAListPtr, PtrInfo: MachinePointerInfo (SV));
1630
1631	// Load the actual argument out of the pointer VAList.
1632	// We can't count on greater alignment than the word size.
1633	return DAG.getLoad(
1634	VT, dl: DL, Chain: InChain, Ptr: VAList, PtrInfo: MachinePointerInfo (),
1635	Alignment: Align (std::min(a: PtrVT.getSizeInBits(), b: VT.getSizeInBits()) / `8`));
1636	}
1637
1638	SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1639	SelectionDAG &DAG) const {
1640	// Generate following code.
1641	// (void)__llvm_grow_stack(size);
1642	// ret = GETSTACKTOP; // pseudo instruction
1643	SDLoc DL(Op);
1644
1645	// Get the inputs.
1646	SDNode *Node = Op.getNode();
1647	SDValue Chain = Op.getOperand(i: `0`);
1648	SDValue Size = Op.getOperand(i: `1`);
1649	MaybeAlign Alignment(Op.getConstantOperandVal(i: `2`));
1650	EVT VT = Node->getValueType(ResNo: `0`);
1651
1652	// Chain the dynamic stack allocation so that it doesn't modify the stack
1653	// pointer when other instructions are using the stack.
1654	Chain = DAG.getCALLSEQ_START(Chain, InSize: `0`, OutSize: `0`, DL);
1655
1656	const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1657	Align StackAlign = TFI.getStackAlign();
1658	bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1659
1660	// Prepare arguments
1661	TargetLowering::ArgListTy Args;
1662	TargetLowering::ArgListEntry Entry;
1663	Entry.Node = Size;
1664	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1665	Args.push_back(x: Entry);
1666	if (NeedsAlign) {
1667	Entry.Node = DAG.getConstant(Val: ~(Alignment ->value() - `1ULL`), DL, VT);
1668	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
1669	Args.push_back(x: Entry);
1670	}
1671	Type RetTy = Type::getVoidTy(C&: DAG.getContext());
1672
1673	EVT PtrVT = Op.getValueType();
1674	SDValue Callee;
1675	if (NeedsAlign) {
1676	Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack_align", VT: PtrVT, TargetFlags: `0`);
1677	} else {
1678	Callee = DAG.getTargetExternalSymbol(Sym: "__ve_grow_stack", VT: PtrVT, TargetFlags: `0`);
1679	}
1680
1681	TargetLowering::CallLoweringInfo CLI(DAG);
1682	CLI.setDebugLoc(DL)
1683	.setChain(Chain)
1684	.setCallee(CC: CallingConv::PreserveAll, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
1685	.setDiscardResult(true);
1686	std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1687	Chain = pair.second;
1688	SDValue Result = DAG.getNode(Opcode: VEISD::GETSTACKTOP, DL, VT, Operand: Chain);
1689	if (NeedsAlign) {
1690	Result = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Result,
1691	N2: DAG.getConstant(Val: (Alignment ->value() - `1ULL`), DL, VT));
1692	Result = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Result,
1693	N2: DAG.getConstant(Val: ~(Alignment ->value() - `1ULL`), DL, VT));
1694	}
1695	// Chain = Result.getValue(1);
1696	Chain = DAG.getCALLSEQ_END(Chain, Size1: `0`, Size2: `0`, Glue: SDValue (), DL);
1697
1698	SDValue Ops[`2`] = {Result, Chain};
1699	return DAG.getMergeValues(Ops, dl: DL);
1700	}
1701
1702	SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1703	SelectionDAG &DAG) const {
1704	SDLoc DL(Op);
1705	return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(`0`),
1706	Op.getOperand(`1`));
1707	}
1708
1709	SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1710	SelectionDAG &DAG) const {
1711	SDLoc DL(Op);
1712	return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1713	DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(`0`),
1714	Op.getOperand(`1`));
1715	}
1716
1717	SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1718	SelectionDAG &DAG) const {
1719	SDLoc DL(Op);
1720	return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1721	Op.getOperand(`0`));
1722	}
1723
1724	static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1725	const VETargetLowering &TLI,
1726	const VESubtarget *Subtarget) {
1727	SDLoc DL(Op);
1728	MachineFunction &MF = DAG.getMachineFunction();
1729	EVT PtrVT = TLI.getPointerTy(DL: MF.getDataLayout());
1730
1731	MachineFrameInfo &MFI = MF.getFrameInfo();
1732	MFI.setFrameAddressIsTaken(true);
1733
1734	unsigned Depth = Op.getConstantOperandVal(i: `0`);
1735	const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1736	Register FrameReg = RegInfo->getFrameRegister(MF);
1737	SDValue FrameAddr =
1738	DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: FrameReg, VT: PtrVT);
1739	while (Depth--)
1740	FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl: DL, Chain: DAG.getEntryNode(),
1741	Ptr: FrameAddr, PtrInfo: MachinePointerInfo ());
1742	return FrameAddr;
1743	}
1744
1745	static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1746	const VETargetLowering &TLI,
1747	const VESubtarget *Subtarget) {
1748	MachineFunction &MF = DAG.getMachineFunction();
1749	MachineFrameInfo &MFI = MF.getFrameInfo();
1750	MFI.setReturnAddressIsTaken(true);
1751
1752	if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1753	return SDValue ();
1754
1755	SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1756
1757	SDLoc DL(Op);
1758	EVT VT = Op.getValueType();
1759	SDValue Offset = DAG.getConstant(Val: `8`, DL, VT);
1760	return DAG.getLoad(VT, dl: DL, Chain: DAG.getEntryNode(),
1761	Ptr: DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: FrameAddr, N2: Offset),
1762	PtrInfo: MachinePointerInfo ());
1763	}
1764
1765	SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1766	SelectionDAG &DAG) const {
1767	SDLoc DL(Op);
1768	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
1769	switch (IntNo) {
1770	default: // Don't custom lower most intrinsics.
1771	return SDValue ();
1772	case Intrinsic::eh_sjlj_lsda: {
1773	MachineFunction &MF = DAG.getMachineFunction();
1774	MVT VT = Op.getSimpleValueType();
1775	const VETargetMachine *TM =
1776	static_cast<const VETargetMachine *>(&DAG.getTarget());
1777
1778	// Create GCC_except_tableXX string. The real symbol for that will be
1779	// generated in EHStreamer::emitExceptionTable() later. So, we just
1780	// borrow it's name here.
1781	TM->getStrList()->push_back(x: std::string(
1782	(Twine ("GCC_except_table") + Twine (MF.getFunctionNumber())).str()));
1783	SDValue Addr =
1784	DAG.getTargetExternalSymbol(Sym: TM->getStrList()->back().c_str(), VT, TargetFlags: `0`);
1785	if (isPositionIndependent()) {
1786	Addr = makeHiLoPair(Op: Addr, HiTF: VEMCExpr::VK_VE_GOTOFF_HI32,
1787	LoTF: VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1788	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT);
1789	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: GlobalBase, N2: Addr);
1790	}
1791	return makeHiLoPair(Op: Addr, HiTF: VEMCExpr::VK_VE_HI32, LoTF: VEMCExpr::VK_VE_LO32, DAG);
1792	}
1793	}
1794	}
1795
1796	static bool getUniqueInsertion(SDNode N, unsigned* &UniqueIdx) {
1797	if (!isa<BuildVectorSDNode>(Val: N))
1798	return false;
1799	const auto *BVN = cast<BuildVectorSDNode>(Val: N);
1800
1801	// Find first non-undef insertion.
1802	unsigned Idx;
1803	for (Idx = `0`; Idx < BVN->getNumOperands(); ++Idx) {
1804	auto ElemV = BVN->getOperand(Num: Idx);
1805	if (!ElemV ->isUndef())
1806	break;
1807	}
1808	// Catch the (hypothetical) all-undef case.
1809	if (Idx == BVN->getNumOperands())
1810	return false;
1811	// Remember insertion.
1812	UniqueIdx = Idx++;
1813	// Verify that all other insertions are undef.
1814	for (; Idx < BVN->getNumOperands(); ++Idx) {
1815	auto ElemV = BVN->getOperand(Num: Idx);
1816	if (!ElemV ->isUndef())
1817	return false;
1818	}
1819	return true;
1820	}
1821
1822	static SDValue getSplatValue(SDNode *N) {
1823	if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(Val: N)) {
1824	return BuildVec->getSplatValue();
1825	}
1826	return SDValue ();
1827	}
1828
1829	SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1830	SelectionDAG &DAG) const {
1831	VECustomDAG CDAG(DAG, Op);
1832	MVT ResultVT = Op.getSimpleValueType();
1833
1834	// If there is just one element, expand to INSERT_VECTOR_ELT.
1835	unsigned UniqueIdx;
1836	if (getUniqueInsertion(N: Op.getNode(), UniqueIdx)) {
1837	SDValue AccuV = CDAG.getUNDEF(VT: Op.getValueType());
1838	auto ElemV = Op ->getOperand(Num: UniqueIdx);
1839	SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1840	return CDAG.getNode(OC: ISD::INSERT_VECTOR_ELT, ResVT: ResultVT, OpV: {AccuV, ElemV, IdxV});
1841	}
1842
1843	// Else emit a broadcast.
1844	if (SDValue ScalarV = getSplatValue(N: Op.getNode())) {
1845	unsigned NumEls = ResultVT.getVectorNumElements();
1846	auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1847	return CDAG.getBroadcast(ResultVT, Scalar: ScalarV, AVL: AVL);
1848	}
1849
1850	// Expand
1851	return SDValue ();
1852	}
1853
1854	TargetLowering::LegalizeAction
1855	VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1856	// Custom legalization on VVP_ and VEC_* opcodes is required to pack-legalize*
1857	// these operations (transform nodes such that their AVL parameter refers to
1858	// packs of 64bit, instead of number of elements.
1859
1860	// Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1861	// re-visit them.
1862	if (isPackingSupportOpcode(Opc: Op.getOpcode()))
1863	return Legal;
1864
1865	// Custom lower to legalize AVL for packed mode.
1866	if (isVVPOrVEC(Op.getOpcode()))
1867	return Custom;
1868	return Legal;
1869	}
1870
1871	SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1872	LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1873	unsigned Opcode = Op.getOpcode();
1874
1875	/// Scalar isel.
1876	switch (Opcode) {
1877	case ISD::ATOMIC_FENCE:
1878	return lowerATOMIC_FENCE(Op, DAG);
1879	case ISD::ATOMIC_SWAP:
1880	return lowerATOMIC_SWAP(Op, DAG);
1881	case ISD::BlockAddress:
1882	return lowerBlockAddress(Op, DAG);
1883	case ISD::ConstantPool:
1884	return lowerConstantPool(Op, DAG);
1885	case ISD::DYNAMIC_STACKALLOC:
1886	return lowerDYNAMIC_STACKALLOC(Op, DAG);
1887	case ISD::EH_SJLJ_LONGJMP:
1888	return lowerEH_SJLJ_LONGJMP(Op, DAG);
1889	case ISD::EH_SJLJ_SETJMP:
1890	return lowerEH_SJLJ_SETJMP(Op, DAG);
1891	case ISD::EH_SJLJ_SETUP_DISPATCH:
1892	return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1893	case ISD::FRAMEADDR:
1894	return lowerFRAMEADDR(Op, DAG, TLI: *this, Subtarget);
1895	case ISD::GlobalAddress:
1896	return lowerGlobalAddress(Op, DAG);
1897	case ISD::GlobalTLSAddress:
1898	return lowerGlobalTLSAddress(Op, DAG);
1899	case ISD::INTRINSIC_WO_CHAIN:
1900	return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1901	case ISD::JumpTable:
1902	return lowerJumpTable(Op, DAG);
1903	case ISD::LOAD:
1904	return lowerLOAD(Op, DAG);
1905	case ISD::RETURNADDR:
1906	return lowerRETURNADDR(Op, DAG, TLI: *this, Subtarget);
1907	case ISD::BUILD_VECTOR:
1908	return lowerBUILD_VECTOR(Op, DAG);
1909	case ISD::STORE:
1910	return lowerSTORE(Op, DAG);
1911	case ISD::VASTART:
1912	return lowerVASTART(Op, DAG);
1913	case ISD::VAARG:
1914	return lowerVAARG(Op, DAG);
1915
1916	case ISD::INSERT_VECTOR_ELT:
1917	return lowerINSERT_VECTOR_ELT(Op, DAG);
1918	case ISD::EXTRACT_VECTOR_ELT:
1919	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1920	}
1921
1922	/// Vector isel.
1923	if (ISD::isVPOpcode(Opcode))
1924	return lowerToVVP(Op, DAG);
1925
1926	switch (Opcode) {
1927	default:
1928	llvm_unreachable("Should not custom lower this!");
1929
1930	// Legalize the AVL of this internal node.
1931	case VEISD::VEC_BROADCAST:
1932	#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1933	#include "VVPNodes.def"
1934	// AVL already legalized.
1935	if (getAnnotatedNodeAVL(Op).second)
1936	return Op;
1937	return legalizeInternalVectorOp(Op, DAG);
1938
1939	// Translate into a VEC_/VVP_* layer operation.*
1940	case ISD::MLOAD:
1941	case ISD::MSTORE:
1942	#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1943	#include "VVPNodes.def"
1944	if (isMaskArithmetic(Op) && isPackedVectorType(SomeVT: Op.getValueType()))
1945	return splitMaskArithmetic(Op, DAG);
1946	return lowerToVVP(Op, DAG);
1947	}
1948	}
1949	/// } Custom Lower
1950
1951	void VETargetLowering::ReplaceNodeResults(SDNode *N,
1952	SmallVectorImpl<SDValue> &Results,
1953	SelectionDAG &DAG) const {
1954	switch (N->getOpcode()) {
1955	case ISD::ATOMIC_SWAP:
1956	// Let LLVM expand atomic swap instruction through LowerOperation.
1957	return;
1958	default:
1959	LLVM_DEBUG(N->dumpr(&DAG));
1960	llvm_unreachable("Do not know how to custom type legalize this operation!");
1961	}
1962	}
1963
1964	/// JumpTable for VE.
1965	///
1966	/// VE cannot generate relocatable symbol in jump table. VE cannot
1967	/// generate expressions using symbols in both text segment and data
1968	/// segment like below.
1969	/// .4byte .LBB0_2-.LJTI0_0
1970	/// So, we generate offset from the top of function like below as
1971	/// a custom label.
1972	/// .4byte .LBB0_2-<function name>
1973
1974	unsigned VETargetLowering::getJumpTableEncoding() const {
1975	// Use custom label for PIC.
1976	if (isPositionIndependent())
1977	return MachineJumpTableInfo::EK_Custom32;
1978
1979	// Otherwise, use the normal jump table encoding heuristics.
1980	return TargetLowering::getJumpTableEncoding();
1981	}
1982
1983	const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1984	const MachineJumpTableInfo MJTI, const* MachineBasicBlock *MBB,
1985	unsigned Uid, MCContext &Ctx) const {
1986	assert(isPositionIndependent());
1987
1988	// Generate custom label for PIC like below.
1989	// .4bytes .LBB0_2-<function name>
1990	const auto *Value = MCSymbolRefExpr::create(Symbol: MBB->getSymbol(), Ctx);
1991	MCSymbol *Sym = Ctx.getOrCreateSymbol(Name: MBB->getParent()->getName().data());
1992	const auto *Base = MCSymbolRefExpr::create(Symbol: Sym, Ctx);
1993	return MCBinaryExpr::createSub(LHS: Value, RHS: Base, Ctx);
1994	}
1995
1996	SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1997	SelectionDAG &DAG) const {
1998	assert(isPositionIndependent());
1999	SDLoc DL(Table);
2000	Function *Function = &DAG.getMachineFunction().getFunction();
2001	assert(Function != nullptr);
2002	auto PtrTy = getPointerTy(DL: DAG.getDataLayout(), AS: Function->getAddressSpace());
2003
2004	// In the jump table, we have following values in PIC mode.
2005	// .4bytes .LBB0_2-<function name>
2006	// We need to add this value and the address of this function to generate
2007	// .LBB0_2 label correctly under PIC mode. So, we want to generate following
2008	// instructions:
2009	// lea %reg, fun@gotoff_lo
2010	// and %reg, %reg, (32)0
2011	// lea.sl %reg, fun@gotoff_hi(%reg, %got)
2012	// In order to do so, we need to genarate correctly marked DAG node using
2013	// makeHiLoPair.
2014	SDValue Op = DAG.getGlobalAddress(GV: Function, DL, VT: PtrTy);
2015	SDValue HiLo = makeHiLoPair(Op, HiTF: VEMCExpr::VK_VE_GOTOFF_HI32,
2016	LoTF: VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2017	SDValue GlobalBase = DAG.getNode(Opcode: VEISD::GLOBAL_BASE_REG, DL, VT: PtrTy);
2018	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrTy, N1: GlobalBase, N2: HiLo);
2019	}
2020
2021	Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2022	MachineBasicBlock::iterator I,
2023	MachineBasicBlock *TargetBB,
2024	const DebugLoc &DL) const {
2025	MachineFunction *MF = MBB.getParent();
2026	MachineRegisterInfo &MRI = MF->getRegInfo();
2027	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2028
2029	const TargetRegisterClass *RC = &VE::I64RegClass;
2030	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2031	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2032	Register Result = MRI.createVirtualRegister(RegClass: RC);
2033
2034	if (isPositionIndependent()) {
2035	// Create following instructions for local linkage PIC code.
2036	// lea %Tmp1, TargetBB@gotoff_lo
2037	// and %Tmp2, %Tmp1, (32)0
2038	// lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2039	BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2040	.addImm(`0`)
2041	.addImm(`0`)
2042	.addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
2043	BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2044	.addReg(Tmp1, getKillRegState(true))
2045	.addImm(M0(`32`));
2046	BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2047	.addReg(VE::SX15)
2048	.addReg(Tmp2, getKillRegState(true))
2049	.addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
2050	} else {
2051	// Create following instructions for non-PIC code.
2052	// lea %Tmp1, TargetBB@lo
2053	// and %Tmp2, %Tmp1, (32)0
2054	// lea.sl %Result, TargetBB@hi(%Tmp2)
2055	BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2056	.addImm(`0`)
2057	.addImm(`0`)
2058	.addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
2059	BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2060	.addReg(Tmp1, getKillRegState(true))
2061	.addImm(M0(`32`));
2062	BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2063	.addReg(Tmp2, getKillRegState(true))
2064	.addImm(`0`)
2065	.addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
2066	}
2067	return Result;
2068	}
2069
2070	Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2071	MachineBasicBlock::iterator I,
2072	StringRef Symbol, const DebugLoc &DL,
2073	bool IsLocal = false,
2074	bool IsCall = false) const {
2075	MachineFunction *MF = MBB.getParent();
2076	MachineRegisterInfo &MRI = MF->getRegInfo();
2077	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2078
2079	const TargetRegisterClass *RC = &VE::I64RegClass;
2080	Register Result = MRI.createVirtualRegister(RegClass: RC);
2081
2082	if (isPositionIndependent()) {
2083	if (IsCall && !IsLocal) {
2084	// Create following instructions for non-local linkage PIC code function
2085	// calls. These instructions uses IC and magic number -24, so we expand
2086	// them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2087	// lea %Reg, Symbol@plt_lo(-24)
2088	// and %Reg, %Reg, (32)0
2089	// sic %s16
2090	// lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2091	BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2092	.addExternalSymbol("abort");
2093	} else if (IsLocal) {
2094	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2095	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2096	// Create following instructions for local linkage PIC code.
2097	// lea %Tmp1, Symbol@gotoff_lo
2098	// and %Tmp2, %Tmp1, (32)0
2099	// lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2100	BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2101	.addImm(`0`)
2102	.addImm(`0`)
2103	.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
2104	BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2105	.addReg(Tmp1, getKillRegState(true))
2106	.addImm(M0(`32`));
2107	BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2108	.addReg(VE::SX15)
2109	.addReg(Tmp2, getKillRegState(true))
2110	.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
2111	} else {
2112	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2113	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2114	// Create following instructions for not local linkage PIC code.
2115	// lea %Tmp1, Symbol@got_lo
2116	// and %Tmp2, %Tmp1, (32)0
2117	// lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2118	// ld %Result, 0(%Tmp3)
2119	Register Tmp3 = MRI.createVirtualRegister(RegClass: RC);
2120	BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2121	.addImm(`0`)
2122	.addImm(`0`)
2123	.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
2124	BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2125	.addReg(Tmp1, getKillRegState(true))
2126	.addImm(M0(`32`));
2127	BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2128	.addReg(VE::SX15)
2129	.addReg(Tmp2, getKillRegState(true))
2130	.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
2131	BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2132	.addReg(Tmp3, getKillRegState(true))
2133	.addImm(`0`)
2134	.addImm(`0`);
2135	}
2136	} else {
2137	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2138	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2139	// Create following instructions for non-PIC code.
2140	// lea %Tmp1, Symbol@lo
2141	// and %Tmp2, %Tmp1, (32)0
2142	// lea.sl %Result, Symbol@hi(%Tmp2)
2143	BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2144	.addImm(`0`)
2145	.addImm(`0`)
2146	.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
2147	BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2148	.addReg(Tmp1, getKillRegState(true))
2149	.addImm(M0(`32`));
2150	BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2151	.addReg(Tmp2, getKillRegState(true))
2152	.addImm(`0`)
2153	.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2154	}
2155	return Result;
2156	}
2157
2158	void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2159	MachineBasicBlock *MBB,
2160	MachineBasicBlock *DispatchBB,
2161	int FI, int Offset) const {
2162	DebugLoc DL = MI.getDebugLoc();
2163	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2164
2165	Register LabelReg =
2166	prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator (MI), TargetBB: DispatchBB, DL);
2167
2168	// Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2169	// referenced by longjmp (throw) later.
2170	MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2171	addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2172	MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2173	}
2174
2175	MachineBasicBlock *
2176	VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2177	MachineBasicBlock MBB) const* {
2178	DebugLoc DL = MI.getDebugLoc();
2179	MachineFunction *MF = MBB->getParent();
2180	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2181	const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2182	MachineRegisterInfo &MRI = MF->getRegInfo();
2183
2184	const BasicBlock *BB = MBB->getBasicBlock();
2185	MachineFunction::iterator I = ++MBB->getIterator();
2186
2187	// Memory Reference.
2188	SmallVector<MachineMemOperand *, `2`> MMOs(MI.memoperands_begin(),
2189	MI.memoperands_end());
2190	Register BufReg = MI.getOperand(i: `1`).getReg();
2191
2192	Register DstReg;
2193
2194	DstReg = MI.getOperand(i: `0`).getReg();
2195	const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
2196	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2197	(void)TRI;
2198	Register MainDestReg = MRI.createVirtualRegister(RegClass: RC);
2199	Register RestoreDestReg = MRI.createVirtualRegister(RegClass: RC);
2200
2201	// For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2202	// instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2203	//
2204	// ThisMBB:
2205	// buf[3] = %s17 iff %s17 is used as BP
2206	// buf[1] = RestoreMBB as IC after longjmp
2207	// # SjLjSetup RestoreMBB
2208	//
2209	// MainMBB:
2210	// v_main = 0
2211	//
2212	// SinkMBB:
2213	// v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2214	// ...
2215	//
2216	// RestoreMBB:
2217	// %s17 = buf[3] = iff %s17 is used as BP
2218	// v_restore = 1
2219	// goto SinkMBB
2220
2221	MachineBasicBlock *ThisMBB = MBB;
2222	MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2223	MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2224	MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2225	MF->insert(MBBI: I, MBB: MainMBB);
2226	MF->insert(MBBI: I, MBB: SinkMBB);
2227	MF->push_back(MBB: RestoreMBB);
2228	RestoreMBB->setMachineBlockAddressTaken();
2229
2230	// Transfer the remainder of BB and its successor edges to SinkMBB.
2231	SinkMBB->splice(Where: SinkMBB->begin(), Other: MBB,
2232	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
2233	SinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
2234
2235	// ThisMBB:
2236	Register LabelReg =
2237	prepareMBB(MBB&: *MBB, I: MachineBasicBlock::iterator (MI), TargetBB: RestoreMBB, DL);
2238
2239	// Store BP in buf[3] iff this function is using BP.
2240	const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2241	if (TFI->hasBP(MF: *MF)) {
2242	MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2243	MIB.addReg(RegNo: BufReg);
2244	MIB.addImm(Val: `0`);
2245	MIB.addImm(Val: `24`);
2246	MIB.addReg(VE::SX17);
2247	MIB.setMemRefs(MMOs);
2248	}
2249
2250	// Store IP in buf[1].
2251	MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2252	MIB.add(MO: MI.getOperand(i: `1`)); // we can preserve the kill flags here.
2253	MIB.addImm(Val: `0`);
2254	MIB.addImm(Val: `8`);
2255	MIB.addReg(RegNo: LabelReg, flags: getKillRegState(B: true));
2256	MIB.setMemRefs(MMOs);
2257
2258	// SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2259
2260	// Insert setup.
2261	MIB =
2262	BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2263
2264	const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2265	MIB.addRegMask(Mask: RegInfo->getNoPreservedMask());
2266	ThisMBB->addSuccessor(Succ: MainMBB);
2267	ThisMBB->addSuccessor(Succ: RestoreMBB);
2268
2269	// MainMBB:
2270	BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2271	.addImm(`0`)
2272	.addImm(`0`)
2273	.addImm(`0`);
2274	MainMBB->addSuccessor(Succ: SinkMBB);
2275
2276	// SinkMBB:
2277	BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2278	.addReg(MainDestReg)
2279	.addMBB(MainMBB)
2280	.addReg(RestoreDestReg)
2281	.addMBB(RestoreMBB);
2282
2283	// RestoreMBB:
2284	// Restore BP from buf[3] iff this function is using BP. The address of
2285	// buf is in SX10.
2286	// FIXME: Better to not use SX10 here
2287	if (TFI->hasBP(MF: *MF)) {
2288	MachineInstrBuilder MIB =
2289	BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2290	MIB.addReg(VE::SX10);
2291	MIB.addImm(Val: `0`);
2292	MIB.addImm(Val: `24`);
2293	MIB.setMemRefs(MMOs);
2294	}
2295	BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2296	.addImm(`0`)
2297	.addImm(`0`)
2298	.addImm(`1`);
2299	BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2300	RestoreMBB->addSuccessor(Succ: SinkMBB);
2301
2302	MI.eraseFromParent();
2303	return SinkMBB;
2304	}
2305
2306	MachineBasicBlock *
2307	VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2308	MachineBasicBlock MBB) const* {
2309	DebugLoc DL = MI.getDebugLoc();
2310	MachineFunction *MF = MBB->getParent();
2311	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2312	MachineRegisterInfo &MRI = MF->getRegInfo();
2313
2314	// Memory Reference.
2315	SmallVector<MachineMemOperand *, `2`> MMOs(MI.memoperands_begin(),
2316	MI.memoperands_end());
2317	Register BufReg = MI.getOperand(i: `0`).getReg();
2318
2319	Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2320	// Since FP is only updated here but NOT referenced, it's treated as GPR.
2321	Register FP = VE::SX9;
2322	Register SP = VE::SX11;
2323
2324	MachineInstrBuilder MIB;
2325
2326	MachineBasicBlock *ThisMBB = MBB;
2327
2328	// For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2329	//
2330	// ThisMBB:
2331	// %fp = load buf[0]
2332	// %jmp = load buf[1]
2333	// %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2334	// %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2335	// jmp %jmp
2336
2337	// Reload FP.
2338	MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2339	MIB.addReg(RegNo: BufReg);
2340	MIB.addImm(Val: `0`);
2341	MIB.addImm(Val: `0`);
2342	MIB.setMemRefs(MMOs);
2343
2344	// Reload IP.
2345	MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2346	MIB.addReg(RegNo: BufReg);
2347	MIB.addImm(Val: `0`);
2348	MIB.addImm(Val: `8`);
2349	MIB.setMemRefs(MMOs);
2350
2351	// Copy BufReg to SX10 for later use in setjmp.
2352	// FIXME: Better to not use SX10 here
2353	BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2354	.addReg(BufReg)
2355	.addImm(`0`);
2356
2357	// Reload SP.
2358	MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2359	MIB.add(MO: MI.getOperand(i: `0`)); // we can preserve the kill flags here.
2360	MIB.addImm(Val: `0`);
2361	MIB.addImm(Val: `16`);
2362	MIB.setMemRefs(MMOs);
2363
2364	// Jump.
2365	BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2366	.addReg(Tmp, getKillRegState(true))
2367	.addImm(`0`);
2368
2369	MI.eraseFromParent();
2370	return ThisMBB;
2371	}
2372
2373	MachineBasicBlock *
2374	VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2375	MachineBasicBlock BB) const* {
2376	DebugLoc DL = MI.getDebugLoc();
2377	MachineFunction *MF = BB->getParent();
2378	MachineFrameInfo &MFI = MF->getFrameInfo();
2379	MachineRegisterInfo &MRI = MF->getRegInfo();
2380	const VEInstrInfo *TII = Subtarget->getInstrInfo();
2381	int FI = MFI.getFunctionContextIndex();
2382
2383	// Get a mapping of the call site numbers to all of the landing pads they're
2384	// associated with.
2385	DenseMap<unsigned, SmallVector<MachineBasicBlock *, `2`>> CallSiteNumToLPad;
2386	unsigned MaxCSNum = `0`;
2387	for (auto &MBB : *MF) {
2388	if (!MBB.isEHPad())
2389	continue;
2390
2391	MCSymbol Sym = nullptr*;
2392	for (const auto &MI : MBB) {
2393	if (MI.isDebugInstr())
2394	continue;
2395
2396	assert(MI.isEHLabel() && "expected EH_LABEL");
2397	Sym = MI.getOperand(i: `0`).getMCSymbol();
2398	break;
2399	}
2400
2401	if (!MF->hasCallSiteLandingPad(Sym))
2402	continue;
2403
2404	for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2405	CallSiteNumToLPad [CSI].push_back(Elt: &MBB);
2406	MaxCSNum = std::max(a: MaxCSNum, b: CSI);
2407	}
2408	}
2409
2410	// Get an ordered list of the machine basic blocks for the jump table.
2411	std::vector<MachineBasicBlock *> LPadList;
2412	SmallPtrSet<MachineBasicBlock *, `32`> InvokeBBs;
2413	LPadList.reserve(n: CallSiteNumToLPad.size());
2414
2415	for (unsigned CSI = `1`; CSI <= MaxCSNum; ++CSI) {
2416	for (auto &LP : CallSiteNumToLPad [CSI]) {
2417	LPadList.push_back(x: LP);
2418	InvokeBBs.insert(I: LP->pred_begin(), E: LP->pred_end());
2419	}
2420	}
2421
2422	assert(!LPadList.empty() &&
2423	"No landing pad destinations for the dispatch jump table!");
2424
2425	// The %fn_context is allocated like below (from --print-after=sjljehprepare):
2426	// %fn_context = alloca { i8, i64, [4 x i64], i8, i8, [5 x i8] }
2427	//
2428	// This `[5 x i8]` is jmpbuf, so jmpbuf[1] is FI+72.*
2429	// First `i64` is callsite, so callsite is FI+8.
2430	static const int OffsetIC = `72`;
2431	static const int OffsetCS = `8`;
2432
2433	// Create the MBBs for the dispatch code like following:
2434	//
2435	// ThisMBB:
2436	// Prepare DispatchBB address and store it to buf[1].
2437	// ...
2438	//
2439	// DispatchBB:
2440	// %s15 = GETGOT iff isPositionIndependent
2441	// %callsite = load callsite
2442	// brgt.l.t #size of callsites, %callsite, DispContBB
2443	//
2444	// TrapBB:
2445	// Call abort.
2446	//
2447	// DispContBB:
2448	// %breg = address of jump table
2449	// %pc = load and calculate next pc from %breg and %callsite
2450	// jmp %pc
2451
2452	// Shove the dispatch's address into the return slot in the function context.
2453	MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2454	DispatchBB->setIsEHPad(true);
2455
2456	// Trap BB will causes trap like `assert(0)`.
2457	MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2458	DispatchBB->addSuccessor(Succ: TrapBB);
2459
2460	MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2461	DispatchBB->addSuccessor(Succ: DispContBB);
2462
2463	// Insert MBBs.
2464	MF->push_back(MBB: DispatchBB);
2465	MF->push_back(MBB: DispContBB);
2466	MF->push_back(MBB: TrapBB);
2467
2468	// Insert code to call abort in the TrapBB.
2469	Register Abort = prepareSymbol(MBB&: *TrapBB, I: TrapBB->end(), Symbol: "abort", DL,
2470	/ Local / IsLocal: false, / Call / IsCall: true);
2471	BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2472	.addReg(Abort, getKillRegState(true))
2473	.addImm(`0`)
2474	.addImm(`0`);
2475
2476	// Insert code into the entry block that creates and registers the function
2477	// context.
2478	setupEntryBlockForSjLj(MI, MBB: BB, DispatchBB, FI, Offset: OffsetIC);
2479
2480	// Create the jump table and associated information
2481	unsigned JTE = getJumpTableEncoding();
2482	MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: JTE);
2483	unsigned MJTI = JTI->createJumpTableIndex(DestBBs: LPadList);
2484
2485	const VERegisterInfo &RI = TII->getRegisterInfo();
2486	// Add a register mask with no preserved registers. This results in all
2487	// registers being marked as clobbered.
2488	BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2489	.addRegMask(RI.getNoPreservedMask());
2490
2491	if (isPositionIndependent()) {
2492	// Force to generate GETGOT, since current implementation doesn't store GOT
2493	// register.
2494	BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2495	}
2496
2497	// IReg is used as an index in a memory operand and therefore can't be SP
2498	const TargetRegisterClass *RC = &VE::I64RegClass;
2499	Register IReg = MRI.createVirtualRegister(RegClass: RC);
2500	addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2501	OffsetCS);
2502	if (LPadList.size() < `64`) {
2503	BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2504	.addImm(VECC::CC_ILE)
2505	.addImm(LPadList.size())
2506	.addReg(IReg)
2507	.addMBB(TrapBB);
2508	} else {
2509	assert(LPadList.size() <= `0x7FFFFFFF` && "Too large Landing Pad!");
2510	Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
2511	BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2512	.addImm(`0`)
2513	.addImm(`0`)
2514	.addImm(LPadList.size());
2515	BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2516	.addImm(VECC::CC_ILE)
2517	.addReg(TmpReg, getKillRegState(true))
2518	.addReg(IReg)
2519	.addMBB(TrapBB);
2520	}
2521
2522	Register BReg = MRI.createVirtualRegister(RegClass: RC);
2523	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2524	Register Tmp2 = MRI.createVirtualRegister(RegClass: RC);
2525
2526	if (isPositionIndependent()) {
2527	// Create following instructions for local linkage PIC code.
2528	// lea %Tmp1, .LJTI0_0@gotoff_lo
2529	// and %Tmp2, %Tmp1, (32)0
2530	// lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2531	BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2532	.addImm(`0`)
2533	.addImm(`0`)
2534	.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2535	BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2536	.addReg(Tmp1, getKillRegState(true))
2537	.addImm(M0(`32`));
2538	BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2539	.addReg(VE::SX15)
2540	.addReg(Tmp2, getKillRegState(true))
2541	.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2542	} else {
2543	// Create following instructions for non-PIC code.
2544	// lea %Tmp1, .LJTI0_0@lo
2545	// and %Tmp2, %Tmp1, (32)0
2546	// lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2547	BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2548	.addImm(`0`)
2549	.addImm(`0`)
2550	.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2551	BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2552	.addReg(Tmp1, getKillRegState(true))
2553	.addImm(M0(`32`));
2554	BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2555	.addReg(Tmp2, getKillRegState(true))
2556	.addImm(`0`)
2557	.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2558	}
2559
2560	switch (JTE) {
2561	case MachineJumpTableInfo::EK_BlockAddress: {
2562	// Generate simple block address code for no-PIC model.
2563	// sll %Tmp1, %IReg, 3
2564	// lds %TReg, 0(%Tmp1, %BReg)
2565	// bcfla %TReg
2566
2567	Register TReg = MRI.createVirtualRegister(RegClass: RC);
2568	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2569
2570	BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2571	.addReg(IReg, getKillRegState(true))
2572	.addImm(`3`);
2573	BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2574	.addReg(BReg, getKillRegState(true))
2575	.addReg(Tmp1, getKillRegState(true))
2576	.addImm(`0`);
2577	BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2578	.addReg(TReg, getKillRegState(true))
2579	.addImm(`0`);
2580	break;
2581	}
2582	case MachineJumpTableInfo::EK_Custom32: {
2583	// Generate block address code using differences from the function pointer
2584	// for PIC model.
2585	// sll %Tmp1, %IReg, 2
2586	// ldl.zx %OReg, 0(%Tmp1, %BReg)
2587	// Prepare function address in BReg2.
2588	// adds.l %TReg, %BReg2, %OReg
2589	// bcfla %TReg
2590
2591	assert(isPositionIndependent());
2592	Register OReg = MRI.createVirtualRegister(RegClass: RC);
2593	Register TReg = MRI.createVirtualRegister(RegClass: RC);
2594	Register Tmp1 = MRI.createVirtualRegister(RegClass: RC);
2595
2596	BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2597	.addReg(IReg, getKillRegState(true))
2598	.addImm(`2`);
2599	BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2600	.addReg(BReg, getKillRegState(true))
2601	.addReg(Tmp1, getKillRegState(true))
2602	.addImm(`0`);
2603	Register BReg2 =
2604	prepareSymbol(MBB&: *DispContBB, I: DispContBB->end(),
2605	Symbol: DispContBB->getParent()->getName(), DL, / Local / IsLocal: true);
2606	BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2607	.addReg(OReg, getKillRegState(true))
2608	.addReg(BReg2, getKillRegState(true));
2609	BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2610	.addReg(TReg, getKillRegState(true))
2611	.addImm(`0`);
2612	break;
2613	}
2614	default:
2615	llvm_unreachable("Unexpected jump table encoding");
2616	}
2617
2618	// Add the jump table entries as successors to the MBB.
2619	SmallPtrSet<MachineBasicBlock *, `8`> SeenMBBs;
2620	for (auto &LP : LPadList)
2621	if (SeenMBBs.insert(Ptr: LP).second)
2622	DispContBB->addSuccessor(Succ: LP);
2623
2624	// N.B. the order the invoke BBs are processed in doesn't matter here.
2625	SmallVector<MachineBasicBlock *, `64`> MBBLPads;
2626	const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2627	for (MachineBasicBlock *MBB : InvokeBBs) {
2628	// Remove the landing pad successor from the invoke block and replace it
2629	// with the new dispatch block.
2630	// Keep a copy of Successors since it's modified inside the loop.
2631	SmallVector<MachineBasicBlock *, `8`> Successors(MBB->succ_rbegin(),
2632	MBB->succ_rend());
2633	// FIXME: Avoid quadratic complexity.
2634	for (auto *MBBS : Successors) {
2635	if (MBBS->isEHPad()) {
2636	MBB->removeSuccessor(Succ: MBBS);
2637	MBBLPads.push_back(Elt: MBBS);
2638	}
2639	}
2640
2641	MBB->addSuccessor(Succ: DispatchBB);
2642
2643	// Find the invoke call and mark all of the callee-saved registers as
2644	// 'implicit defined' so that they're spilled. This prevents code from
2645	// moving instructions to before the EH block, where they will never be
2646	// executed.
2647	for (auto &II : reverse(C&: *MBB)) {
2648	if (!II.isCall())
2649	continue;
2650
2651	DenseMap<Register, bool> DefRegs;
2652	for (auto &MOp : II.operands())
2653	if (MOp.isReg())
2654	DefRegs [MOp.getReg()] = true;
2655
2656	MachineInstrBuilder MIB(*MF, &II);
2657	for (unsigned RI = `0`; SavedRegs[RI]; ++RI) {
2658	Register Reg = SavedRegs[RI];
2659	if (!DefRegs [Reg])
2660	MIB.addReg(RegNo: Reg, flags: RegState::ImplicitDefine \| RegState::Dead);
2661	}
2662
2663	break;
2664	}
2665	}
2666
2667	// Mark all former landing pads as non-landing pads. The dispatch is the only
2668	// landing pad now.
2669	for (auto &LP : MBBLPads)
2670	LP->setIsEHPad(false);
2671
2672	// The instruction is gone now.
2673	MI.eraseFromParent();
2674	return BB;
2675	}
2676
2677	MachineBasicBlock *
2678	VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2679	MachineBasicBlock BB) const* {
2680	switch (MI.getOpcode()) {
2681	default:
2682	llvm_unreachable("Unknown Custom Instruction!");
2683	case VE::EH_SjLj_LongJmp:
2684	return emitEHSjLjLongJmp(MI, MBB: BB);
2685	case VE::EH_SjLj_SetJmp:
2686	return emitEHSjLjSetJmp(MI, MBB: BB);
2687	case VE::EH_SjLj_Setup_Dispatch:
2688	return emitSjLjDispatchBlock(MI, BB);
2689	}
2690	}
2691
2692	static bool isSimm7(SDValue V) {
2693	EVT VT = V.getValueType();
2694	if (VT.isVector())
2695	return false;
2696
2697	if (VT.isInteger()) {
2698	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2699	return isInt<`7`>(x: C->getSExtValue());
2700	} else if (VT.isFloatingPoint()) {
2701	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2702	if (VT == MVT::f32 \|\| VT == MVT::f64) {
2703	const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2704	uint64_t Val = Imm.getSExtValue();
2705	if (Imm.getBitWidth() == `32`)
2706	Val <<= `32`; // Immediate value of float place at higher bits on VE.
2707	return isInt<`7`>(x: Val);
2708	}
2709	}
2710	}
2711	return false;
2712	}
2713
2714	static bool isMImm(SDValue V) {
2715	EVT VT = V.getValueType();
2716	if (VT.isVector())
2717	return false;
2718
2719	if (VT.isInteger()) {
2720	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: V))
2721	return isMImmVal(Val: getImmVal(N: C));
2722	} else if (VT.isFloatingPoint()) {
2723	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: V)) {
2724	if (VT == MVT::f32) {
2725	// Float value places at higher bits, so ignore lower 32 bits.
2726	return isMImm32Val(Val: getFpImmVal(N: C) >> `32`);
2727	} else if (VT == MVT::f64) {
2728	return isMImmVal(Val: getFpImmVal(N: C));
2729	}
2730	}
2731	}
2732	return false;
2733	}
2734
2735	static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2736	if (SrcVT.isFloatingPoint()) {
2737	if (SrcVT == MVT::f128)
2738	return VEISD::CMPQ;
2739	return VEISD::CMPF;
2740	}
2741	return isSignedIntSetCC(Code: CC) ? VEISD::CMPI : VEISD::CMPU;
2742	}
2743
2744	static EVT decideCompType(EVT SrcVT) {
2745	if (SrcVT == MVT::f128)
2746	return MVT::f64;
2747	return SrcVT;
2748	}
2749
2750	static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2751	bool WithCMov) {
2752	if (SrcVT.isFloatingPoint()) {
2753	// For the case of floating point setcc, only unordered comparison
2754	// or general comparison with -enable-no-nans-fp-math option reach
2755	// here, so it is safe even if values are NaN. Only f128 doesn't
2756	// safe since VE uses f64 result of f128 comparison.
2757	return SrcVT != MVT::f128;
2758	}
2759	if (isIntEqualitySetCC(Code: CC)) {
2760	// For the case of equal or not equal, it is safe without comparison with 0.
2761	return true;
2762	}
2763	if (WithCMov) {
2764	// For the case of integer setcc with cmov, all signed comparison with 0
2765	// are safe.
2766	return isSignedIntSetCC(Code: CC);
2767	}
2768	// For the case of integer setcc, only signed 64 bits comparison is safe.
2769	// For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2770	// less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2771	// uncoditional, so it is not safe too without CMPI..
2772	return isSignedIntSetCC(CC) && SrcVT == MVT::i64;
2773	}
2774
2775	static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2776	ISD::CondCode CC, bool WithCMov,
2777	const SDLoc &DL, SelectionDAG &DAG) {
2778	// Compare values. If RHS is 0 and it is safe to calculate without
2779	// comparison, we don't generate an instruction for comparison.
2780	EVT CompVT = decideCompType(SrcVT: VT);
2781	if (CompVT == VT && safeWithoutCompWithNull(SrcVT: VT, CC, WithCMov) &&
2782	(isNullConstant(V: RHS) \|\| isNullFPConstant(V: RHS))) {
2783	return LHS;
2784	}
2785	return DAG.getNode(Opcode: decideComp(SrcVT: VT, CC), DL, VT: CompVT, N1: LHS, N2: RHS);
2786	}
2787
2788	SDValue VETargetLowering::combineSelect(SDNode *N,
2789	DAGCombinerInfo &DCI) const {
2790	assert(N->getOpcode() == ISD::SELECT &&
2791	"Should be called with a SELECT node");
2792	ISD::CondCode CC = ISD::CondCode::SETNE;
2793	SDValue Cond = N->getOperand(Num: `0`);
2794	SDValue True = N->getOperand(Num: `1`);
2795	SDValue False = N->getOperand(Num: `2`);
2796
2797	// We handle only scalar SELECT.
2798	EVT VT = N->getValueType(ResNo: `0`);
2799	if (VT.isVector())
2800	return SDValue ();
2801
2802	// Peform combineSelect after leagalize DAG.
2803	if (!DCI.isAfterLegalizeDAG())
2804	return SDValue ();
2805
2806	EVT VT0 = Cond.getValueType();
2807	if (isMImm(V: True)) {
2808	// VE's condition move can handle MImm in True clause, so nothing to do.
2809	} else if (isMImm(V: False)) {
2810	// VE's condition move can handle MImm in True clause, so swap True and
2811	// False clauses if False has MImm value. And, update condition code.
2812	std::swap(a&: True, b&: False);
2813	CC = getSetCCInverse(Operation: CC, Type: VT0);
2814	}
2815
2816	SDLoc DL(N);
2817	SelectionDAG &DAG = DCI.DAG;
2818	VECC::CondCode VECCVal;
2819	if (VT0.isFloatingPoint()) {
2820	VECCVal = fpCondCode2Fcc(CC);
2821	} else {
2822	VECCVal = intCondCode2Icc(CC);
2823	}
2824	SDValue Ops[] = {Cond, True, False,
2825	DAG.getConstant(VECCVal, DL, MVT::i32)};
2826	return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2827	}
2828
2829	SDValue VETargetLowering::combineSelectCC(SDNode *N,
2830	DAGCombinerInfo &DCI) const {
2831	assert(N->getOpcode() == ISD::SELECT_CC &&
2832	"Should be called with a SELECT_CC node");
2833	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `4`))->get();
2834	SDValue LHS = N->getOperand(Num: `0`);
2835	SDValue RHS = N->getOperand(Num: `1`);
2836	SDValue True = N->getOperand(Num: `2`);
2837	SDValue False = N->getOperand(Num: `3`);
2838
2839	// We handle only scalar SELECT_CC.
2840	EVT VT = N->getValueType(ResNo: `0`);
2841	if (VT.isVector())
2842	return SDValue ();
2843
2844	// Peform combineSelectCC after leagalize DAG.
2845	if (!DCI.isAfterLegalizeDAG())
2846	return SDValue ();
2847
2848	// We handle only i32/i64/f32/f64/f128 comparisons.
2849	EVT LHSVT = LHS.getValueType();
2850	assert(LHSVT == RHS.getValueType());
2851	switch (LHSVT.getSimpleVT().SimpleTy) {
2852	case MVT::i32:
2853	case MVT::i64:
2854	case MVT::f32:
2855	case MVT::f64:
2856	case MVT::f128:
2857	break;
2858	default:
2859	// Return SDValue to let llvm handle other types.
2860	return SDValue ();
2861	}
2862
2863	if (isMImm(V: RHS)) {
2864	// VE's comparison can handle MImm in RHS, so nothing to do.
2865	} else if (isSimm7(V: RHS)) {
2866	// VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2867	// update condition code.
2868	std::swap(a&: LHS, b&: RHS);
2869	CC = getSetCCSwappedOperands(Operation: CC);
2870	}
2871	if (isMImm(V: True)) {
2872	// VE's condition move can handle MImm in True clause, so nothing to do.
2873	} else if (isMImm(V: False)) {
2874	// VE's condition move can handle MImm in True clause, so swap True and
2875	// False clauses if False has MImm value. And, update condition code.
2876	std::swap(a&: True, b&: False);
2877	CC = getSetCCInverse(Operation: CC, Type: LHSVT);
2878	}
2879
2880	SDLoc DL(N);
2881	SelectionDAG &DAG = DCI.DAG;
2882
2883	bool WithCMov = true;
2884	SDValue CompNode = generateComparison(VT: LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2885
2886	VECC::CondCode VECCVal;
2887	if (LHSVT.isFloatingPoint()) {
2888	VECCVal = fpCondCode2Fcc(CC);
2889	} else {
2890	VECCVal = intCondCode2Icc(CC);
2891	}
2892	SDValue Ops[] = {CompNode, True, False,
2893	DAG.getConstant(VECCVal, DL, MVT::i32)};
2894	return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2895	}
2896
2897	static bool isI32InsnAllUses(const SDNode User, const* SDNode *N);
2898	static bool isI32Insn(const SDNode User, const* SDNode *N) {
2899	switch (User->getOpcode()) {
2900	default:
2901	return false;
2902	case ISD::ADD:
2903	case ISD::SUB:
2904	case ISD::MUL:
2905	case ISD::SDIV:
2906	case ISD::UDIV:
2907	case ISD::SETCC:
2908	case ISD::SMIN:
2909	case ISD::SMAX:
2910	case ISD::SHL:
2911	case ISD::SRA:
2912	case ISD::BSWAP:
2913	case ISD::SINT_TO_FP:
2914	case ISD::UINT_TO_FP:
2915	case ISD::BR_CC:
2916	case ISD::BITCAST:
2917	case ISD::ATOMIC_CMP_SWAP:
2918	case ISD::ATOMIC_SWAP:
2919	case VEISD::CMPU:
2920	case VEISD::CMPI:
2921	return true;
2922	case ISD::SRL:
2923	if (N->getOperand(Num: `0`).getOpcode() != ISD::SRL)
2924	return true;
2925	// (srl (trunc (srl ...))) may be optimized by combining srl, so
2926	// doesn't optimize trunc now.
2927	return false;
2928	case ISD::SELECT_CC:
2929	if (User->getOperand(Num: `2`).getNode() != N &&
2930	User->getOperand(Num: `3`).getNode() != N)
2931	return true;
2932	return isI32InsnAllUses(User, N);
2933	case VEISD::CMOV:
2934	// CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2935	// However, trunc in true or false clauses is not safe.
2936	if (User->getOperand(Num: `1`).getNode() != N &&
2937	User->getOperand(Num: `2`).getNode() != N &&
2938	isa<ConstantSDNode>(Val: User->getOperand(Num: `3`))) {
2939	VECC::CondCode VECCVal =
2940	static_cast<VECC::CondCode>(User->getConstantOperandVal(Num: `3`));
2941	return isIntVECondCode(CC: VECCVal);
2942	}
2943	[[fallthrough]];
2944	case ISD::AND:
2945	case ISD::OR:
2946	case ISD::XOR:
2947	case ISD::SELECT:
2948	case ISD::CopyToReg:
2949	// Check all use of selections, bit operations, and copies. If all of them
2950	// are safe, optimize truncate to extract_subreg.
2951	return isI32InsnAllUses(User, N);
2952	}
2953	}
2954
2955	static bool isI32InsnAllUses(const SDNode User, const* SDNode *N) {
2956	// Check all use of User node. If all of them are safe, optimize
2957	// truncate to extract_subreg.
2958	for (const SDNode *U : User->uses()) {
2959	switch (U->getOpcode()) {
2960	default:
2961	// If the use is an instruction which treats the source operand as i32,
2962	// it is safe to avoid truncate here.
2963	if (isI32Insn(User: U, N))
2964	continue;
2965	break;
2966	case ISD::ANY_EXTEND:
2967	case ISD::SIGN_EXTEND:
2968	case ISD::ZERO_EXTEND: {
2969	// Special optimizations to the combination of ext and trunc.
2970	// (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2971	// since this truncate instruction clears higher 32 bits which is filled
2972	// by one of ext instructions later.
2973	assert(N->getValueType(`0`) == MVT::i32 &&
2974	"find truncate to not i32 integer");
2975	if (User->getOpcode() == ISD::SELECT_CC \|\|
2976	User->getOpcode() == ISD::SELECT \|\| User->getOpcode() == VEISD::CMOV)
2977	continue;
2978	break;
2979	}
2980	}
2981	return false;
2982	}
2983	return true;
2984	}
2985
2986	// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2987	// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2988	// is sometime too late. So, doing it at here.
2989	SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2990	DAGCombinerInfo &DCI) const {
2991	assert(N->getOpcode() == ISD::TRUNCATE &&
2992	"Should be called with a TRUNCATE node");
2993
2994	SelectionDAG &DAG = DCI.DAG;
2995	SDLoc DL(N);
2996	EVT VT = N->getValueType(ResNo: `0`);
2997
2998	// We prefer to do this when all types are legal.
2999	if (!DCI.isAfterLegalizeDAG())
3000	return SDValue ();
3001
3002	// Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
3003	if (N->getOperand(Num: `0`)->getOpcode() == ISD::SELECT_CC &&
3004	isa<ConstantSDNode>(Val: N->getOperand(Num: `0`)->getOperand(Num: `0`)) &&
3005	isa<ConstantSDNode>(Val: N->getOperand(Num: `0`)->getOperand(Num: `1`)))
3006	return SDValue ();
3007
3008	// Check all use of this TRUNCATE.
3009	for (const SDNode *User : N->uses()) {
3010	// Make sure that we're not going to replace TRUNCATE for non i32
3011	// instructions.
3012	//
3013	// FIXME: Although we could sometimes handle this, and it does occur in
3014	// practice that one of the condition inputs to the select is also one of
3015	// the outputs, we currently can't deal with this.
3016	if (isI32Insn(User, N))
3017	continue;
3018
3019	return SDValue ();
3020	}
3021
3022	SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3023	return SDValue (DAG.getMachineNode(Opcode: TargetOpcode::EXTRACT_SUBREG, dl: DL, VT,
3024	Op1: N->getOperand(Num: `0`), Op2: SubI32),
3025	`0`);
3026	}
3027
3028	SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3029	DAGCombinerInfo &DCI) const {
3030	switch (N->getOpcode()) {
3031	default:
3032	break;
3033	case ISD::SELECT:
3034	return combineSelect(N, DCI);
3035	case ISD::SELECT_CC:
3036	return combineSelectCC(N, DCI);
3037	case ISD::TRUNCATE:
3038	return combineTRUNCATE(N, DCI);
3039	}
3040
3041	return SDValue ();
3042	}
3043
3044	//===----------------------------------------------------------------------===//
3045	// VE Inline Assembly Support
3046	//===----------------------------------------------------------------------===//
3047
3048	VETargetLowering::ConstraintType
3049	VETargetLowering::getConstraintType(StringRef Constraint) const {
3050	if (Constraint.size() == `1`) {
3051	switch (Constraint [`0`]) {
3052	default:
3053	break;
3054	case `'v'`: // vector registers
3055	return C_RegisterClass;
3056	}
3057	}
3058	return TargetLowering::getConstraintType(Constraint);
3059	}
3060
3061	std::pair<unsigned, const TargetRegisterClass *>
3062	VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3063	StringRef Constraint,
3064	MVT VT) const {
3065	const TargetRegisterClass RC = nullptr*;
3066	if (Constraint.size() == `1`) {
3067	switch (Constraint [`0`]) {
3068	default:
3069	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3070	case `'r'`:
3071	RC = &VE::I64RegClass;
3072	break;
3073	case `'v'`:
3074	RC = &VE::V64RegClass;
3075	break;
3076	}
3077	return std::make_pair(x: `0U`, y&: RC);
3078	}
3079
3080	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3081	}
3082
3083	//===----------------------------------------------------------------------===//
3084	// VE Target Optimization Support
3085	//===----------------------------------------------------------------------===//
3086
3087	unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3088	// Specify 8 for PIC model to relieve the impact of PIC load instructions.
3089	if (isJumpTableRelative())
3090	return `8`;
3091
3092	return TargetLowering::getMinimumJumpTableEntries();
3093	}
3094
3095	bool VETargetLowering::hasAndNot(SDValue Y) const {
3096	EVT VT = Y.getValueType();
3097
3098	// VE doesn't have vector and not instruction.
3099	if (VT.isVector())
3100	return false;
3101
3102	// VE allows different immediate values for X and Y where ~X & Y.
3103	// Only simm7 works for X, and only mimm works for Y on VE. However, this
3104	// function is used to check whether an immediate value is OK for and-not
3105	// instruction as both X and Y. Generating additional instruction to
3106	// retrieve an immediate value is no good since the purpose of this
3107	// function is to convert a series of 3 instructions to another series of
3108	// 3 instructions with better parallelism. Therefore, we return false
3109	// for all immediate values now.
3110	// FIXME: Change hasAndNot function to have two operands to make it work
3111	// correctly with Aurora VE.
3112	if (isa<ConstantSDNode>(Val: Y))
3113	return false;
3114
3115	// It's ok for generic registers.
3116	return true;
3117	}
3118
3119	SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3120	SelectionDAG &DAG) const {
3121	assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3122	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
3123
3124	// Special treatment for packed V64 types.
3125	assert(VT == MVT::v512i32 \|\| VT == MVT::v512f32);
3126	(void)VT;
3127	// Example of codes:
3128	// %packed_v = extractelt %vr, %idx / 2
3129	// %v = %packed_v >> (%idx % 2 32)*
3130	// %res = %v & 0xffffffff
3131
3132	SDValue Vec = Op.getOperand(i: `0`);
3133	SDValue Idx = Op.getOperand(i: `1`);
3134	SDLoc DL(Op);
3135	SDValue Result = Op;
3136	if (false / Idx->isConstant() /) {
3137	// TODO: optimized implementation using constant values
3138	} else {
3139	SDValue Const1 = DAG.getConstant(`1`, DL, MVT::i64);
3140	SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3141	SDValue PackedElt =
3142	SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), `0`);
3143	SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3144	SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3145	SDValue Const5 = DAG.getConstant(`5`, DL, MVT::i64);
3146	Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3147	PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
3148	SDValue Mask = DAG.getConstant(`0xFFFFFFFFL`, DL, MVT::i64);
3149	PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3150	SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3151	Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
3152	MVT::i32, PackedElt, SubI32),
3153	`0`);
3154
3155	if (Op.getSimpleValueType() == MVT::f32) {
3156	Result = DAG.getBitcast(MVT::f32, Result);
3157	} else {
3158	assert(Op.getSimpleValueType() == MVT::i32);
3159	}
3160	}
3161	return Result;
3162	}
3163
3164	SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3165	SelectionDAG &DAG) const {
3166	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3167	MVT VT = Op.getOperand(i: `0`).getSimpleValueType();
3168
3169	// Special treatment for packed V64 types.
3170	assert(VT == MVT::v512i32 \|\| VT == MVT::v512f32);
3171	(void)VT;
3172	// The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3173	// bits" required `val << 32` from C implementation's point of view.
3174	//
3175	// Example of codes:
3176	// %packed_elt = extractelt %vr, (%idx >> 1)
3177	// %shift = ((%idx & 1) ^ 1) << 5
3178	// %packed_elt &= 0xffffffff00000000 >> shift
3179	// %packed_elt \|= (zext %val) << shift
3180	// %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3181
3182	SDLoc DL(Op);
3183	SDValue Vec = Op.getOperand(i: `0`);
3184	SDValue Val = Op.getOperand(i: `1`);
3185	SDValue Idx = Op.getOperand(i: `2`);
3186	if (Idx.getSimpleValueType() == MVT::i32)
3187	Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
3188	if (Val.getSimpleValueType() == MVT::f32)
3189	Val = DAG.getBitcast(MVT::i32, Val);
3190	assert(Val.getSimpleValueType() == MVT::i32);
3191	Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3192
3193	SDValue Result = Op;
3194	if (false / Idx->isConstant()/) {
3195	// TODO: optimized implementation using constant values
3196	} else {
3197	SDValue Const1 = DAG.getConstant(`1`, DL, MVT::i64);
3198	SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3199	SDValue PackedElt =
3200	SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), `0`);
3201	SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3202	SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3203	SDValue Const5 = DAG.getConstant(`5`, DL, MVT::i64);
3204	Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3205	SDValue Mask = DAG.getConstant(`0xFFFFFFFF00000000L`, DL, MVT::i64);
3206	Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
3207	PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3208	Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
3209	PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
3210	Result =
3211	SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
3212	{HalfIdx, PackedElt, Vec}),
3213	`0`);
3214	}
3215	return Result;
3216	}
3217

source code of llvm/lib/Target/VE/VEISelLowering.cpp