HexagonISelLowering.cpp source code [llvm/lib/Target/Hexagon/HexagonISelLowering.cpp]

1	//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the interfaces that Hexagon uses to lower LLVM code
10	// into a selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "HexagonISelLowering.h"
15	#include "Hexagon.h"
16	#include "HexagonMachineFunctionInfo.h"
17	#include "HexagonRegisterInfo.h"
18	#include "HexagonSubtarget.h"
19	#include "HexagonTargetMachine.h"
20	#include "HexagonTargetObjectFile.h"
21	#include "llvm/ADT/APInt.h"
22	#include "llvm/ADT/ArrayRef.h"
23	#include "llvm/ADT/SmallVector.h"
24	#include "llvm/ADT/StringSwitch.h"
25	#include "llvm/CodeGen/CallingConvLower.h"
26	#include "llvm/CodeGen/MachineFrameInfo.h"
27	#include "llvm/CodeGen/MachineFunction.h"
28	#include "llvm/CodeGen/MachineMemOperand.h"
29	#include "llvm/CodeGen/MachineRegisterInfo.h"
30	#include "llvm/CodeGen/RuntimeLibcalls.h"
31	#include "llvm/CodeGen/SelectionDAG.h"
32	#include "llvm/CodeGen/TargetCallingConv.h"
33	#include "llvm/CodeGen/ValueTypes.h"
34	#include "llvm/IR/BasicBlock.h"
35	#include "llvm/IR/CallingConv.h"
36	#include "llvm/IR/DataLayout.h"
37	#include "llvm/IR/DerivedTypes.h"
38	#include "llvm/IR/DiagnosticInfo.h"
39	#include "llvm/IR/DiagnosticPrinter.h"
40	#include "llvm/IR/Function.h"
41	#include "llvm/IR/GlobalValue.h"
42	#include "llvm/IR/InlineAsm.h"
43	#include "llvm/IR/Instructions.h"
44	#include "llvm/IR/IntrinsicInst.h"
45	#include "llvm/IR/Intrinsics.h"
46	#include "llvm/IR/IntrinsicsHexagon.h"
47	#include "llvm/IR/IRBuilder.h"
48	#include "llvm/IR/Module.h"
49	#include "llvm/IR/Type.h"
50	#include "llvm/IR/Value.h"
51	#include "llvm/MC/MCRegisterInfo.h"
52	#include "llvm/Support/Casting.h"
53	#include "llvm/Support/CodeGen.h"
54	#include "llvm/Support/CommandLine.h"
55	#include "llvm/Support/Debug.h"
56	#include "llvm/Support/ErrorHandling.h"
57	#include "llvm/Support/MathExtras.h"
58	#include "llvm/Support/raw_ostream.h"
59	#include "llvm/Target/TargetMachine.h"
60	#include <algorithm>
61	#include <cassert>
62	#include <cstddef>
63	#include <cstdint>
64	#include <limits>
65	#include <utility>
66
67	using namespace llvm;
68
69	#define DEBUG_TYPE "hexagon-lowering"
70
71	static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
72	cl::init(Val: true), cl::Hidden,
73	cl::desc ("Control jump table emission on Hexagon target"));
74
75	static cl::opt<bool>
76	EnableHexSDNodeSched("enable-hexagon-sdnode-sched", cl::Hidden,
77	cl::desc ("Enable Hexagon SDNode scheduling"));
78
79	static cl::opt<bool> EnableFastMath("ffast-math", cl::Hidden,
80	cl::desc ("Enable Fast Math processing"));
81
82	static cl::opt<int> MinimumJumpTables("minimum-jump-tables", cl::Hidden,
83	cl::init(Val: `5`),
84	cl::desc ("Set minimum jump tables"));
85
86	static cl::opt<int>
87	MaxStoresPerMemcpyCL("max-store-memcpy", cl::Hidden, cl::init(Val: `6`),
88	cl::desc ("Max #stores to inline memcpy"));
89
90	static cl::opt<int>
91	MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", cl::Hidden, cl::init(Val: `4`),
92	cl::desc ("Max #stores to inline memcpy"));
93
94	static cl::opt<int>
95	MaxStoresPerMemmoveCL("max-store-memmove", cl::Hidden, cl::init(Val: `6`),
96	cl::desc ("Max #stores to inline memmove"));
97
98	static cl::opt<int>
99	MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", cl::Hidden,
100	cl::init(Val: `4`),
101	cl::desc ("Max #stores to inline memmove"));
102
103	static cl::opt<int>
104	MaxStoresPerMemsetCL("max-store-memset", cl::Hidden, cl::init(Val: `8`),
105	cl::desc ("Max #stores to inline memset"));
106
107	static cl::opt<int>
108	MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::init(Val: `4`),
109	cl::desc ("Max #stores to inline memset"));
110
111	static cl::opt<bool> AlignLoads("hexagon-align-loads",
112	cl::Hidden, cl::init(Val: false),
113	cl::desc ("Rewrite unaligned loads as a pair of aligned loads"));
114
115	static cl::opt<bool>
116	DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden,
117	cl::init(Val: false),
118	cl::desc ("Disable minimum alignment of 1 for "
119	"arguments passed by value on stack"));
120
121	namespace {
122
123	class HexagonCCState : public CCState {
124	unsigned NumNamedVarArgParams = `0`;
125
126	public:
127	HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
128	SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
129	unsigned NumNamedArgs)
130	: CCState (CC, IsVarArg, MF, locs, C),
131	NumNamedVarArgParams(NumNamedArgs) {}
132	unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
133	};
134
135	} // end anonymous namespace
136
137
138	// Implement calling convention for Hexagon.
139
140	static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
141	CCValAssign::LocInfo &LocInfo,
142	ISD::ArgFlagsTy &ArgFlags, CCState &State) {
143	static const MCPhysReg ArgRegs[] = {
144	Hexagon::R0, Hexagon::R1, Hexagon::R2,
145	Hexagon::R3, Hexagon::R4, Hexagon::R5
146	};
147	const unsigned NumArgRegs = std::size(ArgRegs);
148	unsigned RegNum = State.getFirstUnallocated(ArgRegs);
149
150	// RegNum is an index into ArgRegs: skip a register if RegNum is odd.
151	if (RegNum != NumArgRegs && RegNum % `2` == `1`)
152	State.AllocateReg(Reg: ArgRegs[RegNum]);
153
154	// Always return false here, as this function only makes sure that the first
155	// unallocated register has an even register number and does not actually
156	// allocate a register for the current argument.
157	return false;
158	}
159
160	#include "HexagonGenCallingConv.inc"
161
162
163	SDValue
164	HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
165	const {
166	return SDValue ();
167	}
168
169	/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
170	/// by "Src" to address "Dst" of size "Size". Alignment information is
171	/// specified by the specific parameter attribute. The copy will be passed as
172	/// a byval function parameter. Sometimes what we are copying is the end of a
173	/// larger object, the part that does not fit in registers.
174	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
175	SDValue Chain, ISD::ArgFlagsTy Flags,
176	SelectionDAG &DAG, const SDLoc &dl) {
177	SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
178	return DAG.getMemcpy(
179	Chain, dl, Dst, Src, Size: SizeNode, Alignment: Flags.getNonZeroByValAlign(),
180	/isVolatile=/isVol: false, /AlwaysInline=/false,
181	/isTailCall=/false, DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
182	}
183
184	bool
185	HexagonTargetLowering::CanLowerReturn(
186	CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
187	const SmallVectorImpl<ISD::OutputArg> &Outs,
188	LLVMContext &Context) const {
189	SmallVector<CCValAssign, `16`> RVLocs;
190	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
191
192	if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
193	return CCInfo.CheckReturn(Outs, Fn: RetCC_Hexagon_HVX);
194	return CCInfo.CheckReturn(Outs, Fn: RetCC_Hexagon);
195	}
196
197	// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
198	// passed by value, the function prototype is modified to return void and
199	// the value is stored in memory pointed by a pointer passed by caller.
200	SDValue
201	HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
202	bool IsVarArg,
203	const SmallVectorImpl<ISD::OutputArg> &Outs,
204	const SmallVectorImpl<SDValue> &OutVals,
205	const SDLoc &dl, SelectionDAG &DAG) const {
206	// CCValAssign - represent the assignment of the return value to locations.
207	SmallVector<CCValAssign, `16`> RVLocs;
208
209	// CCState - Info about the registers and stack slot.
210	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
211	*DAG.getContext());
212
213	// Analyze return values of ISD::RET
214	if (Subtarget.useHVXOps())
215	CCInfo.AnalyzeReturn(Outs, Fn: RetCC_Hexagon_HVX);
216	else
217	CCInfo.AnalyzeReturn(Outs, Fn: RetCC_Hexagon);
218
219	SDValue Glue;
220	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
221
222	// Copy the result values into the output registers.
223	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
224	CCValAssign &VA = RVLocs [i];
225	SDValue Val = OutVals [i];
226
227	switch (VA.getLocInfo()) {
228	default:
229	// Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
230	llvm_unreachable("Unknown loc info!");
231	case CCValAssign::Full:
232	break;
233	case CCValAssign::BCvt:
234	Val = DAG.getBitcast(VT: VA.getLocVT(), V: Val);
235	break;
236	case CCValAssign::SExt:
237	Val = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Val);
238	break;
239	case CCValAssign::ZExt:
240	Val = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Val);
241	break;
242	case CCValAssign::AExt:
243	Val = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Val);
244	break;
245	}
246
247	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: Val, Glue);
248
249	// Guarantee that all emitted copies are stuck together with flags.
250	Glue = Chain.getValue(R: `1`);
251	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
252	}
253
254	RetOps [`0`] = Chain; // Update chain.
255
256	// Add the glue if we have it.
257	if (Glue.getNode())
258	RetOps.push_back(Elt: Glue);
259
260	return DAG.getNode(HexagonISD::RET_GLUE, dl, MVT::Other, RetOps);
261	}
262
263	bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
264	// If either no tail call or told not to tail call at all, don't.
265	return CI->isTailCall();
266	}
267
268	Register HexagonTargetLowering::getRegisterByName(
269	const char* RegName, LLT VT, const MachineFunction &) const {
270	// Just support r19, the linux kernel uses it.
271	Register Reg = StringSwitch<Register>(RegName)
272	.Case("r0", Hexagon::R0)
273	.Case("r1", Hexagon::R1)
274	.Case("r2", Hexagon::R2)
275	.Case("r3", Hexagon::R3)
276	.Case("r4", Hexagon::R4)
277	.Case("r5", Hexagon::R5)
278	.Case("r6", Hexagon::R6)
279	.Case("r7", Hexagon::R7)
280	.Case("r8", Hexagon::R8)
281	.Case("r9", Hexagon::R9)
282	.Case("r10", Hexagon::R10)
283	.Case("r11", Hexagon::R11)
284	.Case("r12", Hexagon::R12)
285	.Case("r13", Hexagon::R13)
286	.Case("r14", Hexagon::R14)
287	.Case("r15", Hexagon::R15)
288	.Case("r16", Hexagon::R16)
289	.Case("r17", Hexagon::R17)
290	.Case("r18", Hexagon::R18)
291	.Case("r19", Hexagon::R19)
292	.Case("r20", Hexagon::R20)
293	.Case("r21", Hexagon::R21)
294	.Case("r22", Hexagon::R22)
295	.Case("r23", Hexagon::R23)
296	.Case("r24", Hexagon::R24)
297	.Case("r25", Hexagon::R25)
298	.Case("r26", Hexagon::R26)
299	.Case("r27", Hexagon::R27)
300	.Case("r28", Hexagon::R28)
301	.Case("r29", Hexagon::R29)
302	.Case("r30", Hexagon::R30)
303	.Case("r31", Hexagon::R31)
304	.Case("r1:0", Hexagon::D0)
305	.Case("r3:2", Hexagon::D1)
306	.Case("r5:4", Hexagon::D2)
307	.Case("r7:6", Hexagon::D3)
308	.Case("r9:8", Hexagon::D4)
309	.Case("r11:10", Hexagon::D5)
310	.Case("r13:12", Hexagon::D6)
311	.Case("r15:14", Hexagon::D7)
312	.Case("r17:16", Hexagon::D8)
313	.Case("r19:18", Hexagon::D9)
314	.Case("r21:20", Hexagon::D10)
315	.Case("r23:22", Hexagon::D11)
316	.Case("r25:24", Hexagon::D12)
317	.Case("r27:26", Hexagon::D13)
318	.Case("r29:28", Hexagon::D14)
319	.Case("r31:30", Hexagon::D15)
320	.Case("sp", Hexagon::R29)
321	.Case("fp", Hexagon::R30)
322	.Case("lr", Hexagon::R31)
323	.Case("p0", Hexagon::P0)
324	.Case("p1", Hexagon::P1)
325	.Case("p2", Hexagon::P2)
326	.Case("p3", Hexagon::P3)
327	.Case("sa0", Hexagon::SA0)
328	.Case("lc0", Hexagon::LC0)
329	.Case("sa1", Hexagon::SA1)
330	.Case("lc1", Hexagon::LC1)
331	.Case("m0", Hexagon::M0)
332	.Case("m1", Hexagon::M1)
333	.Case("usr", Hexagon::USR)
334	.Case("ugp", Hexagon::UGP)
335	.Case("cs0", Hexagon::CS0)
336	.Case("cs1", Hexagon::CS1)
337	.Default(Register());
338	if (Reg)
339	return Reg;
340
341	report_fatal_error(reason: "Invalid register name global variable");
342	}
343
344	/// LowerCallResult - Lower the result values of an ISD::CALL into the
345	/// appropriate copies out of appropriate physical registers. This assumes that
346	/// Chain/Glue are the input chain/glue to use, and that TheCall is the call
347	/// being lowered. Returns a SDNode with the same number of values as the
348	/// ISD::CALL.
349	SDValue HexagonTargetLowering::LowerCallResult(
350	SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
351	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
352	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
353	const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
354	// Assign locations to each value returned by this call.
355	SmallVector<CCValAssign, `16`> RVLocs;
356
357	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
358	*DAG.getContext());
359
360	if (Subtarget.useHVXOps())
361	CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
362	else
363	CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
364
365	// Copy all of the result registers out of their specified physreg.
366	for (unsigned i = `0`; i != RVLocs.size(); ++i) {
367	SDValue RetVal;
368	if (RVLocs[i].getValVT() == MVT::i1) {
369	// Return values of type MVT::i1 require special handling. The reason
370	// is that MVT::i1 is associated with the PredRegs register class, but
371	// values of that type are still returned in R0. Generate an explicit
372	// copy into a predicate register from R0, and treat the value of the
373	// predicate register as the call result.
374	auto &MRI = DAG.getMachineFunction().getRegInfo();
375	SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
376	MVT::i32, Glue);
377	// FR0 = (Value, Chain, Glue)
378	Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
379	SDValue TPR = DAG.getCopyToReg(Chain: FR0.getValue(R: `1`), dl, Reg: PredR,
380	N: FR0.getValue(R: `0`), Glue: FR0.getValue(R: `2`));
381	// TPR = (Chain, Glue)
382	// Don't glue this CopyFromReg, because it copies from a virtual
383	// register. If it is glued to the call, InstrEmitter will add it
384	// as an implicit def to the call (EmitMachineNode).
385	RetVal = DAG.getCopyFromReg(TPR.getValue(`0`), dl, PredR, MVT::i1);
386	Glue = TPR.getValue(R: `1`);
387	Chain = TPR.getValue(R: `0`);
388	} else {
389	RetVal = DAG.getCopyFromReg(Chain, dl, Reg: RVLocs [i].getLocReg(),
390	VT: RVLocs [i].getValVT(), Glue);
391	Glue = RetVal.getValue(R: `2`);
392	Chain = RetVal.getValue(R: `1`);
393	}
394	InVals.push_back(Elt: RetVal.getValue(R: `0`));
395	}
396
397	return Chain;
398	}
399
400	/// LowerCall - Functions arguments are copied from virtual regs to
401	/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
402	SDValue
403	HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
404	SmallVectorImpl<SDValue> &InVals) const {
405	SelectionDAG &DAG = CLI.DAG;
406	SDLoc &dl = CLI.DL;
407	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
408	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
409	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
410	SDValue Chain = CLI.Chain;
411	SDValue Callee = CLI.Callee;
412	CallingConv::ID CallConv = CLI.CallConv;
413	bool IsVarArg = CLI.IsVarArg;
414	bool DoesNotReturn = CLI.DoesNotReturn;
415
416	bool IsStructRet = Outs.empty() ? false : Outs [`0`].Flags.isSRet();
417	MachineFunction &MF = DAG.getMachineFunction();
418	MachineFrameInfo &MFI = MF.getFrameInfo();
419	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
420
421	unsigned NumParams = CLI.CB ? CLI.CB->getFunctionType()->getNumParams() : `0`;
422	if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
423	Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
424
425	// Linux ABI treats var-arg calls the same way as regular ones.
426	bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
427
428	// Analyze operands of the call, assigning locations to each operand.
429	SmallVector<CCValAssign, `16`> ArgLocs;
430	HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
431	NumParams);
432
433	if (Subtarget.useHVXOps())
434	CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
435	else if (DisableArgsMinAlignment)
436	CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_Legacy);
437	else
438	CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
439
440	if (CLI.IsTailCall) {
441	bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
442	CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CalleeCC: CallConv,
443	isVarArg: IsVarArg, isCalleeStructRet: IsStructRet, isCallerStructRet: StructAttrFlag, Outs,
444	OutVals, Ins, DAG);
445	for (const CCValAssign &VA : ArgLocs) {
446	if (VA.isMemLoc()) {
447	CLI.IsTailCall = false;
448	break;
449	}
450	}
451	LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
452	: "Argument must be passed on stack. "
453	"Not eligible for Tail Call\n"));
454	}
455	// Get a count of how many bytes are to be pushed on the stack.
456	unsigned NumBytes = CCInfo.getStackSize();
457	SmallVector<std::pair<unsigned, SDValue>, `16`> RegsToPass;
458	SmallVector<SDValue, `8`> MemOpChains;
459
460	const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
461	SDValue StackPtr =
462	DAG.getCopyFromReg(Chain, dl, Reg: HRI.getStackRegister(), VT: PtrVT);
463
464	bool NeedsArgAlign = false;
465	Align LargestAlignSeen;
466	// Walk the register/memloc assignments, inserting copies/loads.
467	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
468	CCValAssign &VA = ArgLocs [i];
469	SDValue Arg = OutVals [i];
470	ISD::ArgFlagsTy Flags = Outs [i].Flags;
471	// Record if we need > 8 byte alignment on an argument.
472	bool ArgAlign = Subtarget.isHVXVectorType(VecTy: VA.getValVT());
473	NeedsArgAlign \|= ArgAlign;
474
475	// Promote the value if needed.
476	switch (VA.getLocInfo()) {
477	default:
478	// Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
479	llvm_unreachable("Unknown loc info!");
480	case CCValAssign::Full:
481	break;
482	case CCValAssign::BCvt:
483	Arg = DAG.getBitcast(VT: VA.getLocVT(), V: Arg);
484	break;
485	case CCValAssign::SExt:
486	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
487	break;
488	case CCValAssign::ZExt:
489	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
490	break;
491	case CCValAssign::AExt:
492	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
493	break;
494	}
495
496	if (VA.isMemLoc()) {
497	unsigned LocMemOffset = VA.getLocMemOffset();
498	SDValue MemAddr = DAG.getConstant(Val: LocMemOffset, DL: dl,
499	VT: StackPtr.getValueType());
500	MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
501	if (ArgAlign)
502	LargestAlignSeen = std::max(
503	a: LargestAlignSeen, b: Align (VA.getLocVT().getStoreSizeInBits() / `8`));
504	if (Flags.isByVal()) {
505	// The argument is a struct passed by value. According to LLVM, "Arg"
506	// is a pointer.
507	MemOpChains.push_back(Elt: CreateCopyOfByValArgument(Src: Arg, Dst: MemAddr, Chain,
508	Flags, DAG, dl));
509	} else {
510	MachinePointerInfo LocPI = MachinePointerInfo::getStack(
511	MF&: DAG.getMachineFunction(), Offset: LocMemOffset);
512	SDValue S = DAG.getStore(Chain, dl, Val: Arg, Ptr: MemAddr, PtrInfo: LocPI);
513	MemOpChains.push_back(Elt: S);
514	}
515	continue;
516	}
517
518	// Arguments that can be passed on register must be kept at RegsToPass
519	// vector.
520	if (VA.isRegLoc())
521	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
522	}
523
524	if (NeedsArgAlign && Subtarget.hasV60Ops()) {
525	LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
526	Align VecAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
527	LargestAlignSeen = std::max(a: LargestAlignSeen, b: VecAlign);
528	MFI.ensureMaxAlignment(Alignment: LargestAlignSeen);
529	}
530	// Transform all store nodes into one single node because all store
531	// nodes are independent of each other.
532	if (!MemOpChains.empty())
533	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
534
535	SDValue Glue;
536	if (!CLI.IsTailCall) {
537	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
538	Glue = Chain.getValue(R: `1`);
539	}
540
541	// Build a sequence of copy-to-reg nodes chained together with token
542	// chain and flag operands which copy the outgoing args into registers.
543	// The Glue is necessary since all emitted instructions must be
544	// stuck together.
545	if (!CLI.IsTailCall) {
546	for (const auto &R : RegsToPass) {
547	Chain = DAG.getCopyToReg(Chain, dl, Reg: R.first, N: R.second, Glue);
548	Glue = Chain.getValue(R: `1`);
549	}
550	} else {
551	// For tail calls lower the arguments to the 'real' stack slot.
552	//
553	// Force all the incoming stack arguments to be loaded from the stack
554	// before any new outgoing arguments are stored to the stack, because the
555	// outgoing stack slots may alias the incoming argument stack slots, and
556	// the alias isn't otherwise explicit. This is slightly more conservative
557	// than necessary, because it means that each store effectively depends
558	// on every argument instead of just those arguments it would clobber.
559	//
560	// Do not flag preceding copytoreg stuff together with the following stuff.
561	Glue = SDValue ();
562	for (const auto &R : RegsToPass) {
563	Chain = DAG.getCopyToReg(Chain, dl, Reg: R.first, N: R.second, Glue);
564	Glue = Chain.getValue(R: `1`);
565	}
566	Glue = SDValue ();
567	}
568
569	bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
570	unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : `0`;
571
572	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
573	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
574	// node so that legalize doesn't hack it.
575	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
576	Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL: dl, VT: PtrVT, offset: `0`, TargetFlags: Flags);
577	} else if (ExternalSymbolSDNode *S =
578	dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
579	Callee = DAG.getTargetExternalSymbol(Sym: S->getSymbol(), VT: PtrVT, TargetFlags: Flags);
580	}
581
582	// Returns a chain & a flag for retval copy to use.
583	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
584	SmallVector<SDValue, `8`> Ops;
585	Ops.push_back(Elt: Chain);
586	Ops.push_back(Elt: Callee);
587
588	// Add argument registers to the end of the list so that they are
589	// known live into the call.
590	for (const auto &R : RegsToPass)
591	Ops.push_back(Elt: DAG.getRegister(Reg: R.first, VT: R.second.getValueType()));
592
593	const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
594	assert(Mask && "Missing call preserved mask for calling convention");
595	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
596
597	if (Glue.getNode())
598	Ops.push_back(Elt: Glue);
599
600	if (CLI.IsTailCall) {
601	MFI.setHasTailCall();
602	return DAG.getNode(Opcode: HexagonISD::TC_RETURN, DL: dl, VTList: NodeTys, Ops);
603	}
604
605	// Set this here because we need to know this for "hasFP" in frame lowering.
606	// The target-independent code calls getFrameRegister before setting it, and
607	// getFrameRegister uses hasFP to determine whether the function has FP.
608	MFI.setHasCalls(true);
609
610	unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
611	Chain = DAG.getNode(Opcode: OpCode, DL: dl, VTList: NodeTys, Ops);
612	Glue = Chain.getValue(R: `1`);
613
614	// Create the CALLSEQ_END node.
615	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue, DL: dl);
616	Glue = Chain.getValue(R: `1`);
617
618	// Handle result values, copying them out of physregs into vregs that we
619	// return.
620	return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
621	InVals, OutVals, Callee);
622	}
623
624	/// Returns true by value, base pointer and offset pointer and addressing
625	/// mode by reference if this node can be combined with a load / store to
626	/// form a post-indexed load / store.
627	bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode N, SDNode Op,
628	SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
629	SelectionDAG &DAG) const {
630	LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Val: N);
631	if (!LSN)
632	return false;
633	EVT VT = LSN->getMemoryVT();
634	if (!VT.isSimple())
635	return false;
636	bool IsLegalType = VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32 \|\|
637	VT == MVT::i64 \|\| VT == MVT::f32 \|\| VT == MVT::f64 \|\|
638	VT == MVT::v2i16 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4i8 \|\|
639	VT == MVT::v4i16 \|\| VT == MVT::v8i8 \|\|
640	Subtarget.isHVXVectorType(VT.getSimpleVT());
641	if (!IsLegalType)
642	return false;
643
644	if (Op->getOpcode() != ISD::ADD)
645	return false;
646	Base = Op->getOperand(Num: `0`);
647	Offset = Op->getOperand(Num: `1`);
648	if (!isa<ConstantSDNode>(Val: Offset.getNode()))
649	return false;
650	AM = ISD::POST_INC;
651
652	int32_t V = cast<ConstantSDNode>(Val: Offset.getNode())->getSExtValue();
653	return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, Offset: V);
654	}
655
656	SDValue HexagonTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
657	if (DAG.getMachineFunction().getFunction().hasOptSize())
658	return SDValue ();
659	else
660	return Op;
661	}
662
663	SDValue
664	HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
665	MachineFunction &MF = DAG.getMachineFunction();
666	auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
667	const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
668	unsigned LR = HRI.getRARegister();
669
670	if ((Op.getOpcode() != ISD::INLINEASM &&
671	Op.getOpcode() != ISD::INLINEASM_BR) \|\| HMFI.hasClobberLR())
672	return Op;
673
674	unsigned NumOps = Op.getNumOperands();
675	if (Op.getOperand(NumOps-`1`).getValueType() == MVT::Glue)
676	--NumOps; // Ignore the flag operand.
677
678	for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
679	const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
680	unsigned NumVals = Flags.getNumOperandRegisters();
681	++i; // Skip the ID value.
682
683	switch (Flags.getKind()) {
684	default:
685	llvm_unreachable("Bad flags!");
686	case InlineAsm::Kind::RegUse:
687	case InlineAsm::Kind::Imm:
688	case InlineAsm::Kind::Mem:
689	i += NumVals;
690	break;
691	case InlineAsm::Kind::Clobber:
692	case InlineAsm::Kind::RegDef:
693	case InlineAsm::Kind::RegDefEarlyClobber: {
694	for (; NumVals; --NumVals, ++i) {
695	Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i))->getReg();
696	if (Reg != LR)
697	continue;
698	HMFI.setHasClobberLR(true);
699	return Op;
700	}
701	break;
702	}
703	}
704	}
705
706	return Op;
707	}
708
709	// Need to transform ISD::PREFETCH into something that doesn't inherit
710	// all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
711	// SDNPMayStore.
712	SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
713	SelectionDAG &DAG) const {
714	SDValue Chain = Op.getOperand(i: `0`);
715	SDValue Addr = Op.getOperand(i: `1`);
716	// Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in,
717	// if the "reg" is fed by an "add".
718	SDLoc DL(Op);
719	SDValue Zero = DAG.getConstant(`0`, DL, MVT::i32);
720	return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
721	}
722
723	// Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
724	// is marked as having side-effects, while the register read on Hexagon does
725	// not have any. TableGen refuses to accept the direct pattern from that node
726	// to the A4_tfrcpp.
727	SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
728	SelectionDAG &DAG) const {
729	SDValue Chain = Op.getOperand(i: `0`);
730	SDLoc dl(Op);
731	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
732	return DAG.getNode(Opcode: HexagonISD::READCYCLE, DL: dl, VTList: VTs, N: Chain);
733	}
734
735	SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
736	SelectionDAG &DAG) const {
737	SDValue Chain = Op.getOperand(i: `0`);
738	unsigned IntNo = Op.getConstantOperandVal(i: `1`);
739	// Lower the hexagon_prefetch builtin to DCFETCH, as above.
740	if (IntNo == Intrinsic::hexagon_prefetch) {
741	SDValue Addr = Op.getOperand(i: `2`);
742	SDLoc DL(Op);
743	SDValue Zero = DAG.getConstant(`0`, DL, MVT::i32);
744	return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
745	}
746	return SDValue ();
747	}
748
749	SDValue
750	HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
751	SelectionDAG &DAG) const {
752	SDValue Chain = Op.getOperand(i: `0`);
753	SDValue Size = Op.getOperand(i: `1`);
754	SDValue Align = Op.getOperand(i: `2`);
755	SDLoc dl(Op);
756
757	ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Val&: Align);
758	assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
759
760	unsigned A = AlignConst->getSExtValue();
761	auto &HFI = *Subtarget.getFrameLowering();
762	// "Zero" means natural stack alignment.
763	if (A == `0`)
764	A = HFI.getStackAlign().value();
765
766	LLVM_DEBUG({
767	dbgs () << __func__ << " Align: " << A << " Size: ";
768	Size.getNode()->dump(&DAG);
769	dbgs() << "\n";
770	});
771
772	SDValue AC = DAG.getConstant(A, dl, MVT::i32);
773	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
774	SDValue AA = DAG.getNode(Opcode: HexagonISD::ALLOCA, DL: dl, VTList: VTs, N1: Chain, N2: Size, N3: AC);
775
776	DAG.ReplaceAllUsesOfValueWith(From: Op, To: AA);
777	return AA;
778	}
779
780	SDValue HexagonTargetLowering::LowerFormalArguments(
781	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
782	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
783	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
784	MachineFunction &MF = DAG.getMachineFunction();
785	MachineFrameInfo &MFI = MF.getFrameInfo();
786	MachineRegisterInfo &MRI = MF.getRegInfo();
787
788	// Linux ABI treats var-arg calls the same way as regular ones.
789	bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
790
791	// Assign locations to all of the incoming arguments.
792	SmallVector<CCValAssign, `16`> ArgLocs;
793	HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
794	*DAG.getContext(),
795	MF.getFunction().getFunctionType()->getNumParams());
796
797	if (Subtarget.useHVXOps())
798	CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
799	else if (DisableArgsMinAlignment)
800	CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_Legacy);
801	else
802	CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
803
804	// For LLVM, in the case when returning a struct by value (>8byte),
805	// the first argument is a pointer that points to the location on caller's
806	// stack where the return value will be stored. For Hexagon, the location on
807	// caller's stack is passed only when the struct size is smaller than (and
808	// equal to) 8 bytes. If not, no address will be passed into callee and
809	// callee return the result direclty through R0/R1.
810	auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
811	switch (RC.getID()) {
812	case Hexagon::IntRegsRegClassID:
813	return Reg - Hexagon::R0 + `1`;
814	case Hexagon::DoubleRegsRegClassID:
815	return (Reg - Hexagon::D0 + `1`) * `2`;
816	case Hexagon::HvxVRRegClassID:
817	return Reg - Hexagon::V0 + `1`;
818	case Hexagon::HvxWRRegClassID:
819	return (Reg - Hexagon::W0 + `1`) * `2`;
820	}
821	llvm_unreachable("Unexpected register class");
822	};
823
824	auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
825	auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
826	HFL.FirstVarArgSavedReg = `0`;
827	HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
828
829	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
830	CCValAssign &VA = ArgLocs [i];
831	ISD::ArgFlagsTy Flags = Ins [i].Flags;
832	bool ByVal = Flags.isByVal();
833
834	// Arguments passed in registers:
835	// 1. 32- and 64-bit values and HVX vectors are passed directly,
836	// 2. Large structs are passed via an address, and the address is
837	// passed in a register.
838	if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= `8`)
839	llvm_unreachable("ByValSize must be bigger than 8 bytes");
840
841	bool InReg = VA.isRegLoc() &&
842	(!ByVal \|\| (ByVal && Flags.getByValSize() > `8`));
843
844	if (InReg) {
845	MVT RegVT = VA.getLocVT();
846	if (VA.getLocInfo() == CCValAssign::BCvt)
847	RegVT = VA.getValVT();
848
849	const TargetRegisterClass *RC = getRegClassFor(VT: RegVT);
850	Register VReg = MRI.createVirtualRegister(RegClass: RC);
851	SDValue Copy = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: RegVT);
852
853	// Treat values of type MVT::i1 specially: they are passed in
854	// registers of type i32, but they need to remain as values of
855	// type i1 for consistency of the argument lowering.
856	if (VA.getValVT() == MVT::i1) {
857	assert(RegVT.getSizeInBits() <= `32`);
858	SDValue T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: RegVT,
859	N1: Copy, N2: DAG.getConstant(Val: `1`, DL: dl, VT: RegVT));
860	Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(`0`, dl, RegVT),
861	ISD::SETNE);
862	} else {
863	#ifndef NDEBUG
864	unsigned RegSize = RegVT.getSizeInBits();
865	assert(RegSize == `32` \|\| RegSize == `64` \|\|
866	Subtarget.isHVXVectorType(RegVT));
867	#endif
868	}
869	InVals.push_back(Elt: Copy);
870	MRI.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
871	HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
872	} else {
873	assert(VA.isMemLoc() && "Argument should be passed in memory");
874
875	// If it's a byval parameter, then we need to compute the
876	// "real" size, not the size of the pointer.
877	unsigned ObjSize = Flags.isByVal()
878	? Flags.getByValSize()
879	: VA.getLocVT().getStoreSizeInBits() / `8`;
880
881	// Create the frame index object for this incoming parameter.
882	int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
883	int FI = MFI.CreateFixedObject(Size: ObjSize, SPOffset: Offset, IsImmutable: true);
884	SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
885
886	if (Flags.isByVal()) {
887	// If it's a pass-by-value aggregate, then do not dereference the stack
888	// location. Instead, we should generate a reference to the stack
889	// location.
890	InVals.push_back(Elt: FIN);
891	} else {
892	SDValue L = DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: FIN,
893	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset: `0`));
894	InVals.push_back(Elt: L);
895	}
896	}
897	}
898
899	if (IsVarArg && Subtarget.isEnvironmentMusl()) {
900	for (int i = HFL.FirstVarArgSavedReg; i < `6`; i++)
901	MRI.addLiveIn(Hexagon::R0+i);
902	}
903
904	if (IsVarArg && Subtarget.isEnvironmentMusl()) {
905	HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - `1`);
906	HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
907
908	// Create Frame index for the start of register saved area.
909	int NumVarArgRegs = `6` - HFL.FirstVarArgSavedReg;
910	bool RequiresPadding = (NumVarArgRegs & `1`);
911	int RegSaveAreaSizePlusPadding = RequiresPadding
912	? (NumVarArgRegs + `1`) * `4`
913	: NumVarArgRegs * `4`;
914
915	if (RegSaveAreaSizePlusPadding > `0`) {
916	// The offset to saved register area should be 8 byte aligned.
917	int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
918	if (!(RegAreaStart % `8`))
919	RegAreaStart = (RegAreaStart + `7`) & -`8`;
920
921	int RegSaveAreaFrameIndex =
922	MFI.CreateFixedObject(Size: RegSaveAreaSizePlusPadding, SPOffset: RegAreaStart, IsImmutable: true);
923	HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
924
925	// This will point to the next argument passed via stack.
926	int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
927	int FI = MFI.CreateFixedObject(Hexagon_PointerSize, SPOffset: Offset, IsImmutable: true);
928	HMFI.setVarArgsFrameIndex(FI);
929	} else {
930	// This will point to the next argument passed via stack, when
931	// there is no saved register area.
932	int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
933	int FI = MFI.CreateFixedObject(Hexagon_PointerSize, SPOffset: Offset, IsImmutable: true);
934	HMFI.setRegSavedAreaStartFrameIndex(FI);
935	HMFI.setVarArgsFrameIndex(FI);
936	}
937	}
938
939
940	if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
941	// This will point to the next argument passed via stack.
942	int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
943	int FI = MFI.CreateFixedObject(Hexagon_PointerSize, SPOffset: Offset, IsImmutable: true);
944	HMFI.setVarArgsFrameIndex(FI);
945	}
946
947	return Chain;
948	}
949
950	SDValue
951	HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
952	// VASTART stores the address of the VarArgsFrameIndex slot into the
953	// memory location argument.
954	MachineFunction &MF = DAG.getMachineFunction();
955	HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
956	SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
957	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
958
959	if (!Subtarget.isEnvironmentMusl()) {
960	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: SDLoc (Op), Val: Addr, Ptr: Op.getOperand(i: `1`),
961	PtrInfo: MachinePointerInfo (SV));
962	}
963	auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
964	auto &HFL = *Subtarget.getFrameLowering();
965	SDLoc DL(Op);
966	SmallVector<SDValue, `8`> MemOps;
967
968	// Get frame index of va_list.
969	SDValue FIN = Op.getOperand(i: `1`);
970
971	// If first Vararg register is odd, add 4 bytes to start of
972	// saved register area to point to the first register location.
973	// This is because the saved register area has to be 8 byte aligned.
974	// Incase of an odd start register, there will be 4 bytes of padding in
975	// the beginning of saved register area. If all registers area used up,
976	// the following condition will handle it correctly.
977	SDValue SavedRegAreaStartFrameIndex =
978	DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);
979
980	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
981
982	if (HFL.FirstVarArgSavedReg & `1`)
983	SavedRegAreaStartFrameIndex =
984	DAG.getNode(ISD::ADD, DL, PtrVT,
985	DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
986	MVT::i32),
987	DAG.getIntPtrConstant(`4`, DL));
988
989	// Store the saved register area start pointer.
990	SDValue Store =
991	DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL,
992	Val: SavedRegAreaStartFrameIndex,
993	Ptr: FIN, PtrInfo: MachinePointerInfo (SV));
994	MemOps.push_back(Elt: Store);
995
996	// Store saved register area end pointer.
997	FIN = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
998	N1: FIN, N2: DAG.getIntPtrConstant(Val: `4`, DL));
999	Store = DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL,
1000	Val: DAG.getFrameIndex(FI: FuncInfo.getVarArgsFrameIndex(),
1001	VT: PtrVT),
1002	Ptr: FIN, PtrInfo: MachinePointerInfo (SV, `4`));
1003	MemOps.push_back(Elt: Store);
1004
1005	// Store overflow area pointer.
1006	FIN = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
1007	N1: FIN, N2: DAG.getIntPtrConstant(Val: `4`, DL));
1008	Store = DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL,
1009	Val: DAG.getFrameIndex(FI: FuncInfo.getVarArgsFrameIndex(),
1010	VT: PtrVT),
1011	Ptr: FIN, PtrInfo: MachinePointerInfo (SV, `8`));
1012	MemOps.push_back(Elt: Store);
1013
1014	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1015	}
1016
1017	SDValue
1018	HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
1019	// Assert that the linux ABI is enabled for the current compilation.
1020	assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
1021	SDValue Chain = Op.getOperand(i: `0`);
1022	SDValue DestPtr = Op.getOperand(i: `1`);
1023	SDValue SrcPtr = Op.getOperand(i: `2`);
1024	const Value *DestSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `3`))->getValue();
1025	const Value *SrcSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `4`))->getValue();
1026	SDLoc DL(Op);
1027	// Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
1028	// we need to memcopy 12 bytes from va_list to another similar list.
1029	return DAG.getMemcpy(Chain, dl: DL, Dst: DestPtr, Src: SrcPtr,
1030	Size: DAG.getIntPtrConstant(Val: `12`, DL), Alignment: Align (`4`),
1031	/isVolatile/ isVol: false, AlwaysInline: false, isTailCall: false,
1032	DstPtrInfo: MachinePointerInfo (DestSV), SrcPtrInfo: MachinePointerInfo (SrcSV));
1033	}
1034
1035	SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1036	const SDLoc &dl(Op);
1037	SDValue LHS = Op.getOperand(i: `0`);
1038	SDValue RHS = Op.getOperand(i: `1`);
1039	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
1040	MVT ResTy = ty(Op);
1041	MVT OpTy = ty(Op: LHS);
1042
1043	if (OpTy == MVT::v2i16 \|\| OpTy == MVT::v4i8) {
1044	MVT ElemTy = OpTy.getVectorElementType();
1045	assert(ElemTy.isScalarInteger());
1046	MVT WideTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: `2`*ElemTy.getSizeInBits()),
1047	NumElements: OpTy.getVectorNumElements());
1048	return DAG.getSetCC(DL: dl, VT: ResTy,
1049	LHS: DAG.getSExtOrTrunc(Op: LHS, DL: SDLoc (LHS), VT: WideTy),
1050	RHS: DAG.getSExtOrTrunc(Op: RHS, DL: SDLoc (RHS), VT: WideTy), Cond: CC);
1051	}
1052
1053	// Treat all other vector types as legal.
1054	if (ResTy.isVector())
1055	return Op;
1056
1057	// Comparisons of short integers should use sign-extend, not zero-extend,
1058	// since we can represent small negative values in the compare instructions.
1059	// The LLVM default is to use zero-extend arbitrarily in these cases.
1060	auto isSExtFree = [this](SDValue N) {
1061	switch (N.getOpcode()) {
1062	case ISD::TRUNCATE: {
1063	// A sign-extend of a truncate of a sign-extend is free.
1064	SDValue Op = N.getOperand(i: `0`);
1065	if (Op.getOpcode() != ISD::AssertSext)
1066	return false;
1067	EVT OrigTy = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
1068	unsigned ThisBW = ty(Op: N).getSizeInBits();
1069	unsigned OrigBW = OrigTy.getSizeInBits();
1070	// The type that was sign-extended to get the AssertSext must be
1071	// narrower than the type of N (so that N has still the same value
1072	// as the original).
1073	return ThisBW >= OrigBW;
1074	}
1075	case ISD::LOAD:
1076	// We have sign-extended loads.
1077	return true;
1078	}
1079	return false;
1080	};
1081
1082	if (OpTy == MVT::i8 \|\| OpTy == MVT::i16) {
1083	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS);
1084	bool IsNegative = C && C->getAPIntValue().isNegative();
1085	if (IsNegative \|\| isSExtFree(LHS) \|\| isSExtFree(RHS))
1086	return DAG.getSetCC(dl, ResTy,
1087	DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
1088	DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
1089	}
1090
1091	return SDValue ();
1092	}
1093
1094	SDValue
1095	HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
1096	SDValue PredOp = Op.getOperand(i: `0`);
1097	SDValue Op1 = Op.getOperand(i: `1`), Op2 = Op.getOperand(i: `2`);
1098	MVT OpTy = ty(Op: Op1);
1099	const SDLoc &dl(Op);
1100
1101	if (OpTy == MVT::v2i16 \|\| OpTy == MVT::v4i8) {
1102	MVT ElemTy = OpTy.getVectorElementType();
1103	assert(ElemTy.isScalarInteger());
1104	MVT WideTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: `2`*ElemTy.getSizeInBits()),
1105	NumElements: OpTy.getVectorNumElements());
1106	// Generate (trunc (select (_, sext, sext))).
1107	return DAG.getSExtOrTrunc(
1108	Op: DAG.getSelect(DL: dl, VT: WideTy, Cond: PredOp,
1109	LHS: DAG.getSExtOrTrunc(Op: Op1, DL: dl, VT: WideTy),
1110	RHS: DAG.getSExtOrTrunc(Op: Op2, DL: dl, VT: WideTy)),
1111	DL: dl, VT: OpTy);
1112	}
1113
1114	return SDValue ();
1115	}
1116
1117	SDValue
1118	HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
1119	EVT ValTy = Op.getValueType();
1120	ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Val&: Op);
1121	Constant CVal = nullptr*;
1122	bool isVTi1Type = false;
1123	if (auto *CV = dyn_cast<ConstantVector>(Val: CPN->getConstVal())) {
1124	if (cast<VectorType>(Val: CV->getType())->getElementType()->isIntegerTy(Bitwidth: `1`)) {
1125	IRBuilder<> IRB(CV->getContext());
1126	SmallVector<Constant*, `128`> NewConst;
1127	unsigned VecLen = CV->getNumOperands();
1128	assert(isPowerOf2_32(VecLen) &&
1129	"conversion only supported for pow2 VectorSize");
1130	for (unsigned i = `0`; i < VecLen; ++i)
1131	NewConst.push_back(Elt: IRB.getInt8(C: CV->getOperand(i_nocapture: i)->isZeroValue()));
1132
1133	CVal = ConstantVector::get(V: NewConst);
1134	isVTi1Type = true;
1135	}
1136	}
1137	Align Alignment = CPN->getAlign();
1138	bool IsPositionIndependent = isPositionIndependent();
1139	unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : `0`;
1140
1141	unsigned Offset = `0`;
1142	SDValue T;
1143	if (CPN->isMachineConstantPoolEntry())
1144	T = DAG.getTargetConstantPool(C: CPN->getMachineCPVal(), VT: ValTy, Align: Alignment,
1145	Offset, TargetFlags: TF);
1146	else if (isVTi1Type)
1147	T = DAG.getTargetConstantPool(C: CVal, VT: ValTy, Align: Alignment, Offset, TargetFlags: TF);
1148	else
1149	T = DAG.getTargetConstantPool(C: CPN->getConstVal(), VT: ValTy, Align: Alignment, Offset,
1150	TargetFlags: TF);
1151
1152	assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
1153	"Inconsistent target flag encountered");
1154
1155	if (IsPositionIndependent)
1156	return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: SDLoc (Op), VT: ValTy, Operand: T);
1157	return DAG.getNode(Opcode: HexagonISD::CP, DL: SDLoc (Op), VT: ValTy, Operand: T);
1158	}
1159
1160	SDValue
1161	HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1162	EVT VT = Op.getValueType();
1163	int Idx = cast<JumpTableSDNode>(Val&: Op)->getIndex();
1164	if (isPositionIndependent()) {
1165	SDValue T = DAG.getTargetJumpTable(JTI: Idx, VT, TargetFlags: HexagonII::MO_PCREL);
1166	return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: SDLoc (Op), VT, Operand: T);
1167	}
1168
1169	SDValue T = DAG.getTargetJumpTable(JTI: Idx, VT);
1170	return DAG.getNode(Opcode: HexagonISD::JT, DL: SDLoc (Op), VT, Operand: T);
1171	}
1172
1173	SDValue
1174	HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
1175	const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1176	MachineFunction &MF = DAG.getMachineFunction();
1177	MachineFrameInfo &MFI = MF.getFrameInfo();
1178	MFI.setReturnAddressIsTaken(true);
1179
1180	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1181	return SDValue ();
1182
1183	EVT VT = Op.getValueType();
1184	SDLoc dl(Op);
1185	unsigned Depth = Op.getConstantOperandVal(i: `0`);
1186	if (Depth) {
1187	SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
1188	SDValue Offset = DAG.getConstant(`4`, dl, MVT::i32);
1189	return DAG.getLoad(VT, dl, Chain: DAG.getEntryNode(),
1190	Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: FrameAddr, N2: Offset),
1191	PtrInfo: MachinePointerInfo ());
1192	}
1193
1194	// Return LR, which contains the return address. Mark it an implicit live-in.
1195	Register Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
1196	return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg, VT);
1197	}
1198
1199	SDValue
1200	HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
1201	const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1202	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1203	MFI.setFrameAddressIsTaken(true);
1204
1205	EVT VT = Op.getValueType();
1206	SDLoc dl(Op);
1207	unsigned Depth = Op.getConstantOperandVal(i: `0`);
1208	SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl,
1209	Reg: HRI.getFrameRegister(), VT);
1210	while (Depth--)
1211	FrameAddr = DAG.getLoad(VT, dl, Chain: DAG.getEntryNode(), Ptr: FrameAddr,
1212	PtrInfo: MachinePointerInfo ());
1213	return FrameAddr;
1214	}
1215
1216	SDValue
1217	HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
1218	SDLoc dl(Op);
1219	return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(`0`));
1220	}
1221
1222	SDValue
1223	HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
1224	SDLoc dl(Op);
1225	auto *GAN = cast<GlobalAddressSDNode>(Val&: Op);
1226	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1227	auto *GV = GAN->getGlobal();
1228	int64_t Offset = GAN->getOffset();
1229
1230	auto &HLOF = *HTM.getObjFileLowering();
1231	Reloc::Model RM = HTM.getRelocationModel();
1232
1233	if (RM == Reloc::Static) {
1234	SDValue GA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: Offset);
1235	const GlobalObject *GO = GV->getAliaseeObject();
1236	if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
1237	return DAG.getNode(Opcode: HexagonISD::CONST32_GP, DL: dl, VT: PtrVT, Operand: GA);
1238	return DAG.getNode(Opcode: HexagonISD::CONST32, DL: dl, VT: PtrVT, Operand: GA);
1239	}
1240
1241	bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(GV);
1242	if (UsePCRel) {
1243	SDValue GA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: Offset,
1244	TargetFlags: HexagonII::MO_PCREL);
1245	return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: dl, VT: PtrVT, Operand: GA);
1246	}
1247
1248	// Use GOT index.
1249	SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(VT: PtrVT);
1250	SDValue GA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: HexagonII::MO_GOT);
1251	SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
1252	return DAG.getNode(Opcode: HexagonISD::AT_GOT, DL: dl, VT: PtrVT, N1: GOT, N2: GA, N3: Off);
1253	}
1254
1255	// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1256	SDValue
1257	HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1258	const BlockAddress *BA = cast<BlockAddressSDNode>(Val&: Op)->getBlockAddress();
1259	SDLoc dl(Op);
1260	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1261
1262	Reloc::Model RM = HTM.getRelocationModel();
1263	if (RM == Reloc::Static) {
1264	SDValue A = DAG.getTargetBlockAddress(BA, VT: PtrVT);
1265	return DAG.getNode(Opcode: HexagonISD::CONST32_GP, DL: dl, VT: PtrVT, Operand: A);
1266	}
1267
1268	SDValue A = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: `0`, TargetFlags: HexagonII::MO_PCREL);
1269	return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: dl, VT: PtrVT, Operand: A);
1270	}
1271
1272	SDValue
1273	HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
1274	const {
1275	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1276	SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, VT: PtrVT,
1277	TargetFlags: HexagonII::MO_PCREL);
1278	return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: SDLoc (Op), VT: PtrVT, Operand: GOTSym);
1279	}
1280
1281	SDValue
1282	HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
1283	GlobalAddressSDNode GA, SDValue Glue, EVT PtrVT, unsigned* ReturnReg,
1284	unsigned char OperandFlags) const {
1285	MachineFunction &MF = DAG.getMachineFunction();
1286	MachineFrameInfo &MFI = MF.getFrameInfo();
1287	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1288	SDLoc dl(GA);
1289	SDValue TGA = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: dl,
1290	VT: GA->getValueType(ResNo: `0`),
1291	offset: GA->getOffset(),
1292	TargetFlags: OperandFlags);
1293	// Create Operands for the call.The Operands should have the following:
1294	// 1. Chain SDValue
1295	// 2. Callee which in this case is the Global address value.
1296	// 3. Registers live into the call.In this case its R0, as we
1297	// have just one argument to be passed.
1298	// 4. Glue.
1299	// Note: The order is important.
1300
1301	const auto &HRI = *Subtarget.getRegisterInfo();
1302	const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
1303	assert(Mask && "Missing call preserved mask for calling convention");
1304	SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
1305	DAG.getRegisterMask(Mask), Glue };
1306	Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
1307
1308	// Inform MFI that function has calls.
1309	MFI.setAdjustsStack(true);
1310
1311	Glue = Chain.getValue(R: `1`);
1312	return DAG.getCopyFromReg(Chain, dl, Reg: ReturnReg, VT: PtrVT, Glue);
1313	}
1314
1315	//
1316	// Lower using the intial executable model for TLS addresses
1317	//
1318	SDValue
1319	HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
1320	SelectionDAG &DAG) const {
1321	SDLoc dl(GA);
1322	int64_t Offset = GA->getOffset();
1323	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1324
1325	// Get the thread pointer.
1326	SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1327
1328	bool IsPositionIndependent = isPositionIndependent();
1329	unsigned char TF =
1330	IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1331
1332	// First generate the TLS symbol address
1333	SDValue TGA = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: dl, VT: PtrVT,
1334	offset: Offset, TargetFlags: TF);
1335
1336	SDValue Sym = DAG.getNode(Opcode: HexagonISD::CONST32, DL: dl, VT: PtrVT, Operand: TGA);
1337
1338	if (IsPositionIndependent) {
1339	// Generate the GOT pointer in case of position independent code
1340	SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Op: Sym, DAG);
1341
1342	// Add the TLS Symbol address to GOT pointer.This gives
1343	// GOT relative relocation for the symbol.
1344	Sym = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: GOT, N2: Sym);
1345	}
1346
1347	// Load the offset value for TLS symbol.This offset is relative to
1348	// thread pointer.
1349	SDValue LoadOffset =
1350	DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(), Ptr: Sym, PtrInfo: MachinePointerInfo ());
1351
1352	// Address of the thread local variable is the add of thread
1353	// pointer and the offset of the variable.
1354	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: TP, N2: LoadOffset);
1355	}
1356
1357	//
1358	// Lower using the local executable model for TLS addresses
1359	//
1360	SDValue
1361	HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
1362	SelectionDAG &DAG) const {
1363	SDLoc dl(GA);
1364	int64_t Offset = GA->getOffset();
1365	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1366
1367	// Get the thread pointer.
1368	SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1369	// Generate the TLS symbol address
1370	SDValue TGA = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: dl, VT: PtrVT, offset: Offset,
1371	TargetFlags: HexagonII::MO_TPREL);
1372	SDValue Sym = DAG.getNode(Opcode: HexagonISD::CONST32, DL: dl, VT: PtrVT, Operand: TGA);
1373
1374	// Address of the thread local variable is the add of thread
1375	// pointer and the offset of the variable.
1376	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: TP, N2: Sym);
1377	}
1378
1379	//
1380	// Lower using the general dynamic model for TLS addresses
1381	//
1382	SDValue
1383	HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1384	SelectionDAG &DAG) const {
1385	SDLoc dl(GA);
1386	int64_t Offset = GA->getOffset();
1387	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
1388
1389	// First generate the TLS symbol address
1390	SDValue TGA = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: dl, VT: PtrVT, offset: Offset,
1391	TargetFlags: HexagonII::MO_GDGOT);
1392
1393	// Then, generate the GOT pointer
1394	SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Op: TGA, DAG);
1395
1396	// Add the TLS symbol and the GOT pointer
1397	SDValue Sym = DAG.getNode(Opcode: HexagonISD::CONST32, DL: dl, VT: PtrVT, Operand: TGA);
1398	SDValue Chain = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: GOT, N2: Sym);
1399
1400	// Copy over the argument to R0
1401	SDValue InGlue;
1402	Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InGlue);
1403	InGlue = Chain.getValue(R: `1`);
1404
1405	unsigned Flags = DAG.getSubtarget<HexagonSubtarget>().useLongCalls()
1406	? HexagonII::MO_GDPLT \| HexagonII::HMOTF_ConstExtended
1407	: HexagonII::MO_GDPLT;
1408
1409	return GetDynamicTLSAddr(DAG, Chain, GA, InGlue, PtrVT,
1410	Hexagon::R0, Flags);
1411	}
1412
1413	//
1414	// Lower TLS addresses.
1415	//
1416	// For now for dynamic models, we only support the general dynamic model.
1417	//
1418	SDValue
1419	HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1420	SelectionDAG &DAG) const {
1421	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
1422
1423	switch (HTM.getTLSModel(GA->getGlobal())) {
1424	case TLSModel::GeneralDynamic:
1425	case TLSModel::LocalDynamic:
1426	return LowerToTLSGeneralDynamicModel(GA, DAG);
1427	case TLSModel::InitialExec:
1428	return LowerToTLSInitialExecModel(GA, DAG);
1429	case TLSModel::LocalExec:
1430	return LowerToTLSLocalExecModel(GA, DAG);
1431	}
1432	llvm_unreachable("Bogus TLS model");
1433	}
1434
1435	//===----------------------------------------------------------------------===//
1436	// TargetLowering Implementation
1437	//===----------------------------------------------------------------------===//
1438
1439	HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
1440	const HexagonSubtarget &ST)
1441	: TargetLowering (TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
1442	Subtarget(ST) {
1443	auto &HRI = *Subtarget.getRegisterInfo();
1444
1445	setPrefLoopAlignment(Align (`16`));
1446	setMinFunctionAlignment(Align (`4`));
1447	setPrefFunctionAlignment(Align (`16`));
1448	setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1449	setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
1450	setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
1451
1452	setMaxAtomicSizeInBitsSupported(`64`);
1453	setMinCmpXchgSizeInBits(`32`);
1454
1455	if (EnableHexSDNodeSched)
1456	setSchedulingPreference(Sched::VLIW);
1457	else
1458	setSchedulingPreference(Sched::Source);
1459
1460	// Limits for inline expansion of memcpy/memmove
1461	MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
1462	MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
1463	MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
1464	MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
1465	MaxStoresPerMemset = MaxStoresPerMemsetCL;
1466	MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
1467
1468	//
1469	// Set up register classes.
1470	//
1471
1472	addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
1473	addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa
1474	addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa
1475	addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba
1476	addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
1477	addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
1478	addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
1479	addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
1480	addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass);
1481	addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
1482	addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
1483
1484	addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
1485	addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
1486
1487	//
1488	// Handling of scalar operations.
1489	//
1490	// All operations default to "legal", except:
1491	// - indexed loads and stores (pre-/post-incremented),
1492	// - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1493	// ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1494	// FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1495	// FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1496	// which default to "expand" for at least one type.
1497
1498	// Misc operations.
1499	setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
1500	setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
1501	setOperationAction(ISD::TRAP, MVT::Other, Legal);
1502	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
1503	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
1504	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
1505	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1506	setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
1507	setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
1508	setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1509	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
1510	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1511	setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
1512	setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
1513	setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
1514	setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
1515
1516	// Custom legalize GlobalAddress nodes into CONST32.
1517	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1518	setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
1519	setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
1520
1521	// Hexagon needs to optimize cases with negative constants.
1522	setOperationAction(ISD::SETCC, MVT::i8, Custom);
1523	setOperationAction(ISD::SETCC, MVT::i16, Custom);
1524	setOperationAction(ISD::SETCC, MVT::v4i8, Custom);
1525	setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
1526
1527	// VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1528	setOperationAction(ISD::VASTART, MVT::Other, Custom);
1529	setOperationAction(ISD::VAEND, MVT::Other, Expand);
1530	setOperationAction(ISD::VAARG, MVT::Other, Expand);
1531	if (Subtarget.isEnvironmentMusl())
1532	setOperationAction(ISD::VACOPY, MVT::Other, Custom);
1533	else
1534	setOperationAction(ISD::VACOPY, MVT::Other, Expand);
1535
1536	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1537	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1538	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1539
1540	if (EmitJumpTables)
1541	setMinimumJumpTableEntries(MinimumJumpTables);
1542	else
1543	setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
1544	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1545
1546	for (unsigned LegalIntOp :
1547	{ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) {
1548	setOperationAction(LegalIntOp, MVT::i32, Legal);
1549	setOperationAction(LegalIntOp, MVT::i64, Legal);
1550	}
1551
1552	// Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1553	// but they only operate on i64.
1554	for (MVT VT : MVT::integer_valuetypes()) {
1555	setOperationAction(ISD::UADDO, VT, Custom);
1556	setOperationAction(ISD::USUBO, VT, Custom);
1557	setOperationAction(ISD::SADDO, VT, Expand);
1558	setOperationAction(ISD::SSUBO, VT, Expand);
1559	setOperationAction(ISD::UADDO_CARRY, VT, Expand);
1560	setOperationAction(ISD::USUBO_CARRY, VT, Expand);
1561	}
1562	setOperationAction(ISD::UADDO_CARRY, MVT::i64, Custom);
1563	setOperationAction(ISD::USUBO_CARRY, MVT::i64, Custom);
1564
1565	setOperationAction(ISD::CTLZ, MVT::i8, Promote);
1566	setOperationAction(ISD::CTLZ, MVT::i16, Promote);
1567	setOperationAction(ISD::CTTZ, MVT::i8, Promote);
1568	setOperationAction(ISD::CTTZ, MVT::i16, Promote);
1569
1570	// Popcount can count # of 1s in i64 but returns i32.
1571	setOperationAction(ISD::CTPOP, MVT::i8, Promote);
1572	setOperationAction(ISD::CTPOP, MVT::i16, Promote);
1573	setOperationAction(ISD::CTPOP, MVT::i32, Promote);
1574	setOperationAction(ISD::CTPOP, MVT::i64, Legal);
1575
1576	setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1577	setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
1578	setOperationAction(ISD::BSWAP, MVT::i32, Legal);
1579	setOperationAction(ISD::BSWAP, MVT::i64, Legal);
1580
1581	setOperationAction(ISD::FSHL, MVT::i32, Legal);
1582	setOperationAction(ISD::FSHL, MVT::i64, Legal);
1583	setOperationAction(ISD::FSHR, MVT::i32, Legal);
1584	setOperationAction(ISD::FSHR, MVT::i64, Legal);
1585
1586	for (unsigned IntExpOp :
1587	{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
1588	ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
1589	ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
1590	ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
1591	for (MVT VT : MVT::integer_valuetypes())
1592	setOperationAction(IntExpOp, VT, Expand);
1593	}
1594
1595	for (unsigned FPExpOp :
1596	{ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
1597	ISD::FPOW, ISD::FCOPYSIGN}) {
1598	for (MVT VT : MVT::fp_valuetypes())
1599	setOperationAction(FPExpOp, VT, Expand);
1600	}
1601
1602	// No extending loads from i32.
1603	for (MVT VT : MVT::integer_valuetypes()) {
1604	setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
1605	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
1606	setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
1607	}
1608	// Turn FP truncstore into trunc + store.
1609	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1610	// Turn FP extload into load/fpextend.
1611	for (MVT VT : MVT::fp_valuetypes())
1612	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1613
1614	// Expand BR_CC and SELECT_CC for all integer and fp types.
1615	for (MVT VT : MVT::integer_valuetypes()) {
1616	setOperationAction(ISD::BR_CC, VT, Expand);
1617	setOperationAction(ISD::SELECT_CC, VT, Expand);
1618	}
1619	for (MVT VT : MVT::fp_valuetypes()) {
1620	setOperationAction(ISD::BR_CC, VT, Expand);
1621	setOperationAction(ISD::SELECT_CC, VT, Expand);
1622	}
1623	setOperationAction(ISD::BR_CC, MVT::Other, Expand);
1624
1625	//
1626	// Handling of vector operations.
1627	//
1628
1629	// Set the action for vector operations to "expand", then override it with
1630	// either "custom" or "legal" for specific cases.
1631	static const unsigned VectExpOps[] = {
1632	// Integer arithmetic:
1633	ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
1634	ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::SADDO,
1635	ISD::UADDO, ISD::SSUBO, ISD::USUBO, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
1636	// Logical/bit:
1637	ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
1638	ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::BSWAP, ISD::BITREVERSE,
1639	// Floating point arithmetic/math functions:
1640	ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV,
1641	ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN,
1642	ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
1643	ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC,
1644	ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR,
1645	ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, ISD::FLDEXP,
1646	// Misc:
1647	ISD::BR_CC, ISD::SELECT_CC, ISD::ConstantPool,
1648	// Vector:
1649	ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR,
1650	ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT,
1651	ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
1652	ISD::CONCAT_VECTORS, ISD::VECTOR_SHUFFLE,
1653	ISD::SPLAT_VECTOR,
1654	};
1655
1656	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1657	for (unsigned VectExpOp : VectExpOps)
1658	setOperationAction(VectExpOp, VT, Expand);
1659
1660	// Expand all extending loads and truncating stores:
1661	for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
1662	if (TargetVT == VT)
1663	continue;
1664	setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
1665	setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
1666	setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
1667	setTruncStoreAction(VT, TargetVT, Expand);
1668	}
1669
1670	// Normalize all inputs to SELECT to be vectors of i32.
1671	if (VT.getVectorElementType() != MVT::i32) {
1672	MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/`32`);
1673	setOperationAction(ISD::SELECT, VT, Promote);
1674	AddPromotedToType(ISD::SELECT, VT, VT32);
1675	}
1676	setOperationAction(ISD::SRA, VT, Custom);
1677	setOperationAction(ISD::SHL, VT, Custom);
1678	setOperationAction(ISD::SRL, VT, Custom);
1679	}
1680
1681	// Extending loads from (native) vectors of i8 into (native) vectors of i16
1682	// are legal.
1683	setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1684	setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1685	setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1686	setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1687	setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1688	setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1689
1690	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
1691	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1692	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1693
1694	// Types natively supported:
1695	for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
1696	MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1697	setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom);
1698	setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
1699	setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom);
1700	setOperationAction(ISD::EXTRACT_SUBVECTOR, NativeVT, Custom);
1701	setOperationAction(ISD::INSERT_SUBVECTOR, NativeVT, Custom);
1702	setOperationAction(ISD::CONCAT_VECTORS, NativeVT, Custom);
1703
1704	setOperationAction(ISD::ADD, NativeVT, Legal);
1705	setOperationAction(ISD::SUB, NativeVT, Legal);
1706	setOperationAction(ISD::MUL, NativeVT, Legal);
1707	setOperationAction(ISD::AND, NativeVT, Legal);
1708	setOperationAction(ISD::OR, NativeVT, Legal);
1709	setOperationAction(ISD::XOR, NativeVT, Legal);
1710
1711	if (NativeVT.getVectorElementType() != MVT::i1) {
1712	setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
1713	setOperationAction(ISD::BSWAP, NativeVT, Legal);
1714	setOperationAction(ISD::BITREVERSE, NativeVT, Legal);
1715	}
1716	}
1717
1718	for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
1719	setOperationAction(ISD::SMIN, VT, Legal);
1720	setOperationAction(ISD::SMAX, VT, Legal);
1721	setOperationAction(ISD::UMIN, VT, Legal);
1722	setOperationAction(ISD::UMAX, VT, Legal);
1723	}
1724
1725	// Custom lower unaligned loads.
1726	// Also, for both loads and stores, verify the alignment of the address
1727	// in case it is a compile-time constant. This is a usability feature to
1728	// provide a meaningful error message to users.
1729	for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
1730	MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1731	setOperationAction(ISD::LOAD, VT, Custom);
1732	setOperationAction(ISD::STORE, VT, Custom);
1733	}
1734
1735	// Custom-lower load/stores of boolean vectors.
1736	for (MVT VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1737	setOperationAction(ISD::LOAD, VT, Custom);
1738	setOperationAction(ISD::STORE, VT, Custom);
1739	}
1740
1741	// Normalize integer compares to EQ/GT/UGT
1742	for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
1743	MVT::v2i32}) {
1744	setCondCodeAction(ISD::SETNE, VT, Expand);
1745	setCondCodeAction(ISD::SETLE, VT, Expand);
1746	setCondCodeAction(ISD::SETGE, VT, Expand);
1747	setCondCodeAction(ISD::SETLT, VT, Expand);
1748	setCondCodeAction(ISD::SETULE, VT, Expand);
1749	setCondCodeAction(ISD::SETUGE, VT, Expand);
1750	setCondCodeAction(ISD::SETULT, VT, Expand);
1751	}
1752
1753	// Normalize boolean compares to [U]LE/[U]LT
1754	for (MVT VT : {MVT::i1, MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1755	setCondCodeAction(ISD::SETGE, VT, Expand);
1756	setCondCodeAction(ISD::SETGT, VT, Expand);
1757	setCondCodeAction(ISD::SETUGE, VT, Expand);
1758	setCondCodeAction(ISD::SETUGT, VT, Expand);
1759	}
1760
1761	// Custom-lower bitcasts from i8 to v8i1.
1762	setOperationAction(ISD::BITCAST, MVT::i8, Custom);
1763	setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
1764	setOperationAction(ISD::VSELECT, MVT::v4i8, Custom);
1765	setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
1766	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
1767	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
1768	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
1769
1770	// V5+.
1771	setOperationAction(ISD::FMA, MVT::f64, Expand);
1772	setOperationAction(ISD::FADD, MVT::f64, Expand);
1773	setOperationAction(ISD::FSUB, MVT::f64, Expand);
1774	setOperationAction(ISD::FMUL, MVT::f64, Expand);
1775	setOperationAction(ISD::FDIV, MVT::f32, Custom);
1776
1777	setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1778	setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1779
1780	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
1781	setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
1782	setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
1783	setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
1784	setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
1785	setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
1786	setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
1787	setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
1788	setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
1789	setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
1790	setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
1791	setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
1792
1793	// Special handling for half-precision floating point conversions.
1794	// Lower half float conversions into library calls.
1795	setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1796	setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1797	setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1798	setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1799
1800	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
1801	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
1802	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1803	setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1804
1805	// Handling of indexed loads/stores: default is "expand".
1806	//
1807	for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
1808	MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
1809	setIndexedLoadAction(ISD::POST_INC, VT, Legal);
1810	setIndexedStoreAction(ISD::POST_INC, VT, Legal);
1811	}
1812
1813	// Subtarget-specific operation actions.
1814	//
1815	if (Subtarget.hasV60Ops()) {
1816	setOperationAction(ISD::ROTL, MVT::i32, Legal);
1817	setOperationAction(ISD::ROTL, MVT::i64, Legal);
1818	setOperationAction(ISD::ROTR, MVT::i32, Legal);
1819	setOperationAction(ISD::ROTR, MVT::i64, Legal);
1820	}
1821	if (Subtarget.hasV66Ops()) {
1822	setOperationAction(ISD::FADD, MVT::f64, Legal);
1823	setOperationAction(ISD::FSUB, MVT::f64, Legal);
1824	}
1825	if (Subtarget.hasV67Ops()) {
1826	setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1827	setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1828	setOperationAction(ISD::FMUL, MVT::f64, Legal);
1829	}
1830
1831	setTargetDAGCombine(ISD::OR);
1832	setTargetDAGCombine(ISD::TRUNCATE);
1833	setTargetDAGCombine(ISD::VSELECT);
1834
1835	if (Subtarget.useHVXOps())
1836	initializeHVXLowering();
1837
1838	computeRegisterProperties(&HRI);
1839
1840	//
1841	// Library calls for unsupported operations
1842	//
1843	bool FastMath = EnableFastMath;
1844
1845	setLibcallName(Call: RTLIB::SDIV_I32, Name: "__hexagon_divsi3");
1846	setLibcallName(Call: RTLIB::SDIV_I64, Name: "__hexagon_divdi3");
1847	setLibcallName(Call: RTLIB::UDIV_I32, Name: "__hexagon_udivsi3");
1848	setLibcallName(Call: RTLIB::UDIV_I64, Name: "__hexagon_udivdi3");
1849	setLibcallName(Call: RTLIB::SREM_I32, Name: "__hexagon_modsi3");
1850	setLibcallName(Call: RTLIB::SREM_I64, Name: "__hexagon_moddi3");
1851	setLibcallName(Call: RTLIB::UREM_I32, Name: "__hexagon_umodsi3");
1852	setLibcallName(Call: RTLIB::UREM_I64, Name: "__hexagon_umoddi3");
1853
1854	setLibcallName(Call: RTLIB::SINTTOFP_I128_F64, Name: "__hexagon_floattidf");
1855	setLibcallName(Call: RTLIB::SINTTOFP_I128_F32, Name: "__hexagon_floattisf");
1856	setLibcallName(Call: RTLIB::FPTOUINT_F32_I128, Name: "__hexagon_fixunssfti");
1857	setLibcallName(Call: RTLIB::FPTOUINT_F64_I128, Name: "__hexagon_fixunsdfti");
1858	setLibcallName(Call: RTLIB::FPTOSINT_F32_I128, Name: "__hexagon_fixsfti");
1859	setLibcallName(Call: RTLIB::FPTOSINT_F64_I128, Name: "__hexagon_fixdfti");
1860
1861	// This is the only fast library function for sqrtd.
1862	if (FastMath)
1863	setLibcallName(Call: RTLIB::SQRT_F64, Name: "__hexagon_fast2_sqrtdf2");
1864
1865	// Prefix is: nothing for "slow-math",
1866	// "fast2_" for V5+ fast-math double-precision
1867	// (actually, keep fast-math and fast-math2 separate for now)
1868	if (FastMath) {
1869	setLibcallName(Call: RTLIB::ADD_F64, Name: "__hexagon_fast_adddf3");
1870	setLibcallName(Call: RTLIB::SUB_F64, Name: "__hexagon_fast_subdf3");
1871	setLibcallName(Call: RTLIB::MUL_F64, Name: "__hexagon_fast_muldf3");
1872	setLibcallName(Call: RTLIB::DIV_F64, Name: "__hexagon_fast_divdf3");
1873	setLibcallName(Call: RTLIB::DIV_F32, Name: "__hexagon_fast_divsf3");
1874	} else {
1875	setLibcallName(Call: RTLIB::ADD_F64, Name: "__hexagon_adddf3");
1876	setLibcallName(Call: RTLIB::SUB_F64, Name: "__hexagon_subdf3");
1877	setLibcallName(Call: RTLIB::MUL_F64, Name: "__hexagon_muldf3");
1878	setLibcallName(Call: RTLIB::DIV_F64, Name: "__hexagon_divdf3");
1879	setLibcallName(Call: RTLIB::DIV_F32, Name: "__hexagon_divsf3");
1880	}
1881
1882	if (FastMath)
1883	setLibcallName(Call: RTLIB::SQRT_F32, Name: "__hexagon_fast2_sqrtf");
1884	else
1885	setLibcallName(Call: RTLIB::SQRT_F32, Name: "__hexagon_sqrtf");
1886
1887	// Routines to handle fp16 storage type.
1888	setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__truncsfhf2");
1889	setLibcallName(Call: RTLIB::FPROUND_F64_F16, Name: "__truncdfhf2");
1890	setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__extendhfsf2");
1891
1892	// These cause problems when the shift amount is non-constant.
1893	setLibcallName(Call: RTLIB::SHL_I128, Name: nullptr);
1894	setLibcallName(Call: RTLIB::SRL_I128, Name: nullptr);
1895	setLibcallName(Call: RTLIB::SRA_I128, Name: nullptr);
1896	}
1897
1898	const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
1899	switch ((HexagonISD::NodeType)Opcode) {
1900	case HexagonISD::ADDC: return "HexagonISD::ADDC";
1901	case HexagonISD::SUBC: return "HexagonISD::SUBC";
1902	case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA";
1903	case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
1904	case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
1905	case HexagonISD::BARRIER: return "HexagonISD::BARRIER";
1906	case HexagonISD::CALL: return "HexagonISD::CALL";
1907	case HexagonISD::CALLnr: return "HexagonISD::CALLnr";
1908	case HexagonISD::CALLR: return "HexagonISD::CALLR";
1909	case HexagonISD::COMBINE: return "HexagonISD::COMBINE";
1910	case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
1911	case HexagonISD::CONST32: return "HexagonISD::CONST32";
1912	case HexagonISD::CP: return "HexagonISD::CP";
1913	case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH";
1914	case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
1915	case HexagonISD::TSTBIT: return "HexagonISD::TSTBIT";
1916	case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
1917	case HexagonISD::INSERT: return "HexagonISD::INSERT";
1918	case HexagonISD::JT: return "HexagonISD::JT";
1919	case HexagonISD::RET_GLUE: return "HexagonISD::RET_GLUE";
1920	case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
1921	case HexagonISD::VASL: return "HexagonISD::VASL";
1922	case HexagonISD::VASR: return "HexagonISD::VASR";
1923	case HexagonISD::VLSR: return "HexagonISD::VLSR";
1924	case HexagonISD::MFSHL: return "HexagonISD::MFSHL";
1925	case HexagonISD::MFSHR: return "HexagonISD::MFSHR";
1926	case HexagonISD::SSAT: return "HexagonISD::SSAT";
1927	case HexagonISD::USAT: return "HexagonISD::USAT";
1928	case HexagonISD::SMUL_LOHI: return "HexagonISD::SMUL_LOHI";
1929	case HexagonISD::UMUL_LOHI: return "HexagonISD::UMUL_LOHI";
1930	case HexagonISD::USMUL_LOHI: return "HexagonISD::USMUL_LOHI";
1931	case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW";
1932	case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
1933	case HexagonISD::VROR: return "HexagonISD::VROR";
1934	case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
1935	case HexagonISD::PTRUE: return "HexagonISD::PTRUE";
1936	case HexagonISD::PFALSE: return "HexagonISD::PFALSE";
1937	case HexagonISD::D2P: return "HexagonISD::D2P";
1938	case HexagonISD::P2D: return "HexagonISD::P2D";
1939	case HexagonISD::V2Q: return "HexagonISD::V2Q";
1940	case HexagonISD::Q2V: return "HexagonISD::Q2V";
1941	case HexagonISD::QCAT: return "HexagonISD::QCAT";
1942	case HexagonISD::QTRUE: return "HexagonISD::QTRUE";
1943	case HexagonISD::QFALSE: return "HexagonISD::QFALSE";
1944	case HexagonISD::TL_EXTEND: return "HexagonISD::TL_EXTEND";
1945	case HexagonISD::TL_TRUNCATE: return "HexagonISD::TL_TRUNCATE";
1946	case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST";
1947	case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
1948	case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
1949	case HexagonISD::ISEL: return "HexagonISD::ISEL";
1950	case HexagonISD::OP_END: break;
1951	}
1952	return nullptr;
1953	}
1954
1955	bool
1956	HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign,
1957	const SDLoc &dl, SelectionDAG &DAG) const {
1958	auto *CA = dyn_cast<ConstantSDNode>(Val&: Ptr);
1959	if (!CA)
1960	return true;
1961	unsigned Addr = CA->getZExtValue();
1962	Align HaveAlign =
1963	Addr != `0` ? Align (`1ull` << llvm::countr_zero(Val: Addr)) : NeedAlign;
1964	if (HaveAlign >= NeedAlign)
1965	return true;
1966
1967	static int DK_MisalignedTrap = llvm::getNextAvailablePluginDiagnosticKind();
1968
1969	struct DiagnosticInfoMisalignedTrap : public DiagnosticInfo {
1970	DiagnosticInfoMisalignedTrap(StringRef M)
1971	: DiagnosticInfo (DK_MisalignedTrap, DS_Remark), Msg (M) {}
1972	void print(DiagnosticPrinter &DP) const override {
1973	DP << Msg;
1974	}
1975	static bool classof(const DiagnosticInfo *DI) {
1976	return DI->getKind() == DK_MisalignedTrap;
1977	}
1978	StringRef Msg;
1979	};
1980
1981	std::string ErrMsg;
1982	raw_string_ostream O(ErrMsg);
1983	O << "Misaligned constant address: " << format_hex(N: Addr, Width: `10`)
1984	<< " has alignment " << HaveAlign.value()
1985	<< ", but the memory access requires " << NeedAlign.value();
1986	if (DebugLoc DL = dl.getDebugLoc())
1987	DL.print(OS&: O << ", at ");
1988	O << ". The instruction has been replaced with a trap.";
1989
1990	DAG.getContext()->diagnose(DI: DiagnosticInfoMisalignedTrap(O.str()));
1991	return false;
1992	}
1993
1994	SDValue
1995	HexagonTargetLowering::replaceMemWithUndef(SDValue Op, SelectionDAG &DAG)
1996	const {
1997	const SDLoc &dl(Op);
1998	auto *LS = cast<LSBaseSDNode>(Val: Op.getNode());
1999	assert(!LS->isIndexed() && "Not expecting indexed ops on constant address");
2000
2001	SDValue Chain = LS->getChain();
2002	SDValue Trap = DAG.getNode(ISD::TRAP, dl, MVT::Other, Chain);
2003	if (LS->getOpcode() == ISD::LOAD)
2004	return DAG.getMergeValues(Ops: {DAG.getUNDEF(VT: ty(Op)), Trap}, dl);
2005	return Trap;
2006	}
2007
2008	// Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
2009	// intrinsic.
2010	static bool isBrevLdIntrinsic(const Value *Inst) {
2011	unsigned ID = cast<IntrinsicInst>(Val: Inst)->getIntrinsicID();
2012	return (ID == Intrinsic::hexagon_L2_loadrd_pbr \|\|
2013	ID == Intrinsic::hexagon_L2_loadri_pbr \|\|
2014	ID == Intrinsic::hexagon_L2_loadrh_pbr \|\|
2015	ID == Intrinsic::hexagon_L2_loadruh_pbr \|\|
2016	ID == Intrinsic::hexagon_L2_loadrb_pbr \|\|
2017	ID == Intrinsic::hexagon_L2_loadrub_pbr);
2018	}
2019
2020	// Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
2021	// instruction. So far we only handle bitcast, extract value and bit reverse
2022	// load intrinsic instructions. Should we handle CGEP ?
2023	static Value getBrevLdObject(Value V) {
2024	if (Operator::getOpcode(V) == Instruction::ExtractValue \|\|
2025	Operator::getOpcode(V) == Instruction::BitCast)
2026	V = cast<Operator>(Val: V)->getOperand(i: `0`);
2027	else if (isa<IntrinsicInst>(Val: V) && isBrevLdIntrinsic(Inst: V))
2028	V = cast<Instruction>(Val: V)->getOperand(i: `0`);
2029	return V;
2030	}
2031
2032	// Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
2033	// a back edge. If the back edge comes from the intrinsic itself, the incoming
2034	// edge is returned.
2035	static Value returnEdge(const* PHINode PN, Value IntrBaseVal) {
2036	const BasicBlock *Parent = PN->getParent();
2037	int Idx = -`1`;
2038	for (unsigned i = `0`, e = PN->getNumIncomingValues(); i < e; ++i) {
2039	BasicBlock *Blk = PN->getIncomingBlock(i);
2040	// Determine if the back edge is originated from intrinsic.
2041	if (Blk == Parent) {
2042	Value *BackEdgeVal = PN->getIncomingValue(i);
2043	Value *BaseVal;
2044	// Loop over till we return the same Value or we hit the IntrBaseVal.
2045	do {
2046	BaseVal = BackEdgeVal;
2047	BackEdgeVal = getBrevLdObject(V: BackEdgeVal);
2048	} while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
2049	// If the getBrevLdObject returns IntrBaseVal, we should return the
2050	// incoming edge.
2051	if (IntrBaseVal == BackEdgeVal)
2052	continue;
2053	Idx = i;
2054	break;
2055	} else // Set the node to incoming edge.
2056	Idx = i;
2057	}
2058	assert(Idx >= `0` && "Unexpected index to incoming argument in PHI");
2059	return PN->getIncomingValue(i: Idx);
2060	}
2061
2062	// Bit-reverse Load Intrinsic: Figure out the underlying object the base
2063	// pointer points to, for the bit-reverse load intrinsic. Setting this to
2064	// memoperand might help alias analysis to figure out the dependencies.
2065	static Value getUnderLyingObjectForBrevLdIntr(Value V) {
2066	Value *IntrBaseVal = V;
2067	Value *BaseVal;
2068	// Loop over till we return the same Value, implies we either figure out
2069	// the object or we hit a PHI
2070	do {
2071	BaseVal = V;
2072	V = getBrevLdObject(V);
2073	} while (BaseVal != V);
2074
2075	// Identify the object from PHINode.
2076	if (const PHINode *PN = dyn_cast<PHINode>(Val: V))
2077	return returnEdge(PN, IntrBaseVal);
2078	// For non PHI nodes, the object is the last value returned by getBrevLdObject
2079	else
2080	return V;
2081	}
2082
2083	/// Given an intrinsic, checks if on the target the intrinsic will need to map
2084	/// to a MemIntrinsicNode (touches memory). If this is the case, it returns
2085	/// true and store the intrinsic information into the IntrinsicInfo that was
2086	/// passed to the function.
2087	bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
2088	const CallInst &I,
2089	MachineFunction &MF,
2090	unsigned Intrinsic) const {
2091	switch (Intrinsic) {
2092	case Intrinsic::hexagon_L2_loadrd_pbr:
2093	case Intrinsic::hexagon_L2_loadri_pbr:
2094	case Intrinsic::hexagon_L2_loadrh_pbr:
2095	case Intrinsic::hexagon_L2_loadruh_pbr:
2096	case Intrinsic::hexagon_L2_loadrb_pbr:
2097	case Intrinsic::hexagon_L2_loadrub_pbr: {
2098	Info.opc = ISD::INTRINSIC_W_CHAIN;
2099	auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
2100	auto &Cont = I.getCalledFunction()->getParent()->getContext();
2101	// The intrinsic function call is of the form { ElTy, i8 }*
2102	// @llvm.hexagon.L2.loadXX.pbr(i8, i32). The pointer and memory access type*
2103	// should be derived from ElTy.
2104	Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(N: `0`);
2105	Info.memVT = MVT::getVT(Ty: ElTy);
2106	llvm::Value *BasePtrVal = I.getOperand(i_nocapture: `0`);
2107	Info.ptrVal = getUnderLyingObjectForBrevLdIntr(V: BasePtrVal);
2108	// The offset value comes through Modifier register. For now, assume the
2109	// offset is 0.
2110	Info.offset = `0`;
2111	Info.align = DL.getABITypeAlign(Ty: Info.memVT.getTypeForEVT(Context&: Cont));
2112	Info.flags = MachineMemOperand::MOLoad;
2113	return true;
2114	}
2115	case Intrinsic::hexagon_V6_vgathermw:
2116	case Intrinsic::hexagon_V6_vgathermw_128B:
2117	case Intrinsic::hexagon_V6_vgathermh:
2118	case Intrinsic::hexagon_V6_vgathermh_128B:
2119	case Intrinsic::hexagon_V6_vgathermhw:
2120	case Intrinsic::hexagon_V6_vgathermhw_128B:
2121	case Intrinsic::hexagon_V6_vgathermwq:
2122	case Intrinsic::hexagon_V6_vgathermwq_128B:
2123	case Intrinsic::hexagon_V6_vgathermhq:
2124	case Intrinsic::hexagon_V6_vgathermhq_128B:
2125	case Intrinsic::hexagon_V6_vgathermhwq:
2126	case Intrinsic::hexagon_V6_vgathermhwq_128B: {
2127	const Module &M = *I.getParent()->getParent()->getParent();
2128	Info.opc = ISD::INTRINSIC_W_CHAIN;
2129	Type *VecTy = I.getArgOperand(i: `1`)->getType();
2130	Info.memVT = MVT::getVT(Ty: VecTy);
2131	Info.ptrVal = I.getArgOperand(i: `0`);
2132	Info.offset = `0`;
2133	Info.align =
2134	MaybeAlign (M.getDataLayout().getTypeAllocSizeInBits(Ty: VecTy) / `8`);
2135	Info.flags = MachineMemOperand::MOLoad \|
2136	MachineMemOperand::MOStore \|
2137	MachineMemOperand::MOVolatile;
2138	return true;
2139	}
2140	default:
2141	break;
2142	}
2143	return false;
2144	}
2145
2146	bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
2147	return X.getValueType().isScalarInteger(); // 'tstbit'
2148	}
2149
2150	bool HexagonTargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
2151	return isTruncateFree(VT1: EVT::getEVT(Ty: Ty1), VT2: EVT::getEVT(Ty: Ty2));
2152	}
2153
2154	bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
2155	if (!VT1.isSimple() \|\| !VT2.isSimple())
2156	return false;
2157	return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
2158	}
2159
2160	bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
2161	const MachineFunction &MF, EVT VT) const {
2162	return isOperationLegalOrCustom(Op: ISD::FMA, VT);
2163	}
2164
2165	// Should we expand the build vector with shuffles?
2166	bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
2167	unsigned DefinedValues) const {
2168	return false;
2169	}
2170
2171	bool HexagonTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2172	unsigned Index) const {
2173	assert(ResVT.getVectorElementType() == SrcVT.getVectorElementType());
2174	if (!ResVT.isSimple() \|\| !SrcVT.isSimple())
2175	return false;
2176
2177	MVT ResTy = ResVT.getSimpleVT(), SrcTy = SrcVT.getSimpleVT();
2178	if (ResTy.getVectorElementType() != MVT::i1)
2179	return true;
2180
2181	// Non-HVX bool vectors are relatively cheap.
2182	return SrcTy.getVectorNumElements() <= `8`;
2183	}
2184
2185	bool HexagonTargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
2186	return Op.getOpcode() == ISD::CONCAT_VECTORS \|\|
2187	TargetLowering::isTargetCanonicalConstantNode(Op);
2188	}
2189
2190	bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
2191	EVT VT) const {
2192	return true;
2193	}
2194
2195	TargetLoweringBase::LegalizeTypeAction
2196	HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
2197	unsigned VecLen = VT.getVectorMinNumElements();
2198	MVT ElemTy = VT.getVectorElementType();
2199
2200	if (VecLen == `1` \|\| VT.isScalableVector())
2201	return TargetLoweringBase::TypeScalarizeVector;
2202
2203	if (Subtarget.useHVXOps()) {
2204	unsigned Action = getPreferredHvxVectorAction(VecTy: VT);
2205	if (Action != ~`0u`)
2206	return static_cast<TargetLoweringBase::LegalizeTypeAction>(Action);
2207	}
2208
2209	// Always widen (remaining) vectors of i1.
2210	if (ElemTy == MVT::i1)
2211	return TargetLoweringBase::TypeWidenVector;
2212	// Widen non-power-of-2 vectors. Such types cannot be split right now,
2213	// and computeRegisterProperties will override "split" with "widen",
2214	// which can cause other issues.
2215	if (!isPowerOf2_32(Value: VecLen))
2216	return TargetLoweringBase::TypeWidenVector;
2217
2218	return TargetLoweringBase::TypeSplitVector;
2219	}
2220
2221	TargetLoweringBase::LegalizeAction
2222	HexagonTargetLowering::getCustomOperationAction(SDNode &Op) const {
2223	if (Subtarget.useHVXOps()) {
2224	unsigned Action = getCustomHvxOperationAction(Op);
2225	if (Action != ~`0u`)
2226	return static_cast<TargetLoweringBase::LegalizeAction>(Action);
2227	}
2228	return TargetLoweringBase::Legal;
2229	}
2230
2231	std::pair<SDValue, int>
2232	HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
2233	if (Addr.getOpcode() == ISD::ADD) {
2234	SDValue Op1 = Addr.getOperand(i: `1`);
2235	if (auto CN = dyn_cast<const* ConstantSDNode>(Val: Op1.getNode()))
2236	return { Addr.getOperand(i: `0`), CN->getSExtValue() };
2237	}
2238	return { Addr, `0` };
2239	}
2240
2241	// Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
2242	// to select data from, V3 is the permutation.
2243	SDValue
2244	HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
2245	const {
2246	const auto *SVN = cast<ShuffleVectorSDNode>(Val&: Op);
2247	ArrayRef<int> AM = SVN->getMask();
2248	assert(AM.size() <= `8` && "Unexpected shuffle mask");
2249	unsigned VecLen = AM.size();
2250
2251	MVT VecTy = ty(Op);
2252	assert(!Subtarget.isHVXVectorType(VecTy, true) &&
2253	"HVX shuffles should be legal");
2254	assert(VecTy.getSizeInBits() <= `64` && "Unexpected vector length");
2255
2256	SDValue Op0 = Op.getOperand(i: `0`);
2257	SDValue Op1 = Op.getOperand(i: `1`);
2258	const SDLoc &dl(Op);
2259
2260	// If the inputs are not the same as the output, bail. This is not an
2261	// error situation, but complicates the handling and the default expansion
2262	// (into BUILD_VECTOR) should be adequate.
2263	if (ty(Op: Op0) != VecTy \|\| ty(Op: Op1) != VecTy)
2264	return SDValue ();
2265
2266	// Normalize the mask so that the first non-negative index comes from
2267	// the first operand.
2268	SmallVector<int,`8`> Mask(AM.begin(), AM.end());
2269	unsigned F = llvm::find_if(Range&: AM, P: [](int M) { return M >= `0`; }) - AM.data();
2270	if (F == AM.size())
2271	return DAG.getUNDEF(VT: VecTy);
2272	if (AM [F] >= int(VecLen)) {
2273	ShuffleVectorSDNode::commuteMask(Mask);
2274	std::swap(a&: Op0, b&: Op1);
2275	}
2276
2277	// Express the shuffle mask in terms of bytes.
2278	SmallVector<int,`8`> ByteMask;
2279	unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / `8`;
2280	for (int M : Mask) {
2281	if (M < `0`) {
2282	for (unsigned j = `0`; j != ElemBytes; ++j)
2283	ByteMask.push_back(Elt: -`1`);
2284	} else {
2285	for (unsigned j = `0`; j != ElemBytes; ++j)
2286	ByteMask.push_back(Elt: M*ElemBytes + j);
2287	}
2288	}
2289	assert(ByteMask.size() <= `8`);
2290
2291	// All non-undef (non-negative) indexes are well within [0..127], so they
2292	// fit in a single byte. Build two 64-bit words:
2293	// - MaskIdx where each byte is the corresponding index (for non-negative
2294	// indexes), and 0xFF for negative indexes, and
2295	// - MaskUnd that has 0xFF for each negative index.
2296	uint64_t MaskIdx = `0`;
2297	uint64_t MaskUnd = `0`;
2298	for (unsigned i = `0`, e = ByteMask.size(); i != e; ++i) {
2299	unsigned S = `8`*i;
2300	uint64_t M = ByteMask [i] & `0xFF`;
2301	if (M == `0xFF`)
2302	MaskUnd \|= M << S;
2303	MaskIdx \|= M << S;
2304	}
2305
2306	if (ByteMask.size() == `4`) {
2307	// Identity.
2308	if (MaskIdx == (`0x03020100` \| MaskUnd))
2309	return Op0;
2310	// Byte swap.
2311	if (MaskIdx == (`0x00010203` \| MaskUnd)) {
2312	SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
2313	SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
2314	return DAG.getBitcast(VT: VecTy, V: T1);
2315	}
2316
2317	// Byte packs.
2318	SDValue Concat10 =
2319	getCombine(Hi: Op1, Lo: Op0, dl, ResTy: typeJoin(Tys: {ty(Op: Op1), ty(Op: Op0)}), DAG);
2320	if (MaskIdx == (`0x06040200` \| MaskUnd))
2321	return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
2322	if (MaskIdx == (`0x07050301` \| MaskUnd))
2323	return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
2324
2325	SDValue Concat01 =
2326	getCombine(Hi: Op0, Lo: Op1, dl, ResTy: typeJoin(Tys: {ty(Op: Op0), ty(Op: Op1)}), DAG);
2327	if (MaskIdx == (`0x02000604` \| MaskUnd))
2328	return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
2329	if (MaskIdx == (`0x03010705` \| MaskUnd))
2330	return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
2331	}
2332
2333	if (ByteMask.size() == `8`) {
2334	// Identity.
2335	if (MaskIdx == (`0x0706050403020100ull` \| MaskUnd))
2336	return Op0;
2337	// Byte swap.
2338	if (MaskIdx == (`0x0001020304050607ull` \| MaskUnd)) {
2339	SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
2340	SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
2341	return DAG.getBitcast(VT: VecTy, V: T1);
2342	}
2343
2344	// Halfword picks.
2345	if (MaskIdx == (`0x0d0c050409080100ull` \| MaskUnd))
2346	return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
2347	if (MaskIdx == (`0x0f0e07060b0a0302ull` \| MaskUnd))
2348	return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
2349	if (MaskIdx == (`0x0d0c090805040100ull` \| MaskUnd))
2350	return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
2351	if (MaskIdx == (`0x0f0e0b0a07060302ull` \| MaskUnd))
2352	return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
2353	if (MaskIdx == (`0x0706030205040100ull` \| MaskUnd)) {
2354	VectorPair P = opSplit(Vec: Op0, dl, DAG);
2355	return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
2356	}
2357
2358	// Byte packs.
2359	if (MaskIdx == (`0x0e060c040a020800ull` \| MaskUnd))
2360	return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
2361	if (MaskIdx == (`0x0f070d050b030901ull` \| MaskUnd))
2362	return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
2363	}
2364
2365	return SDValue ();
2366	}
2367
2368	SDValue
2369	HexagonTargetLowering::getSplatValue(SDValue Op, SelectionDAG &DAG) const {
2370	switch (Op.getOpcode()) {
2371	case ISD::BUILD_VECTOR:
2372	if (SDValue S = cast<BuildVectorSDNode>(Val&: Op)->getSplatValue())
2373	return S;
2374	break;
2375	case ISD::SPLAT_VECTOR:
2376	return Op.getOperand(i: `0`);
2377	}
2378	return SDValue ();
2379	}
2380
2381	// Create a Hexagon-specific node for shifting a vector by an integer.
2382	SDValue
2383	HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
2384	const {
2385	unsigned NewOpc;
2386	switch (Op.getOpcode()) {
2387	case ISD::SHL:
2388	NewOpc = HexagonISD::VASL;
2389	break;
2390	case ISD::SRA:
2391	NewOpc = HexagonISD::VASR;
2392	break;
2393	case ISD::SRL:
2394	NewOpc = HexagonISD::VLSR;
2395	break;
2396	default:
2397	llvm_unreachable("Unexpected shift opcode");
2398	}
2399
2400	if (SDValue Sp = getSplatValue(Op: Op.getOperand(i: `1`), DAG))
2401	return DAG.getNode(Opcode: NewOpc, DL: SDLoc (Op), VT: ty(Op), N1: Op.getOperand(i: `0`), N2: Sp);
2402	return SDValue ();
2403	}
2404
2405	SDValue
2406	HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
2407	const SDLoc &dl(Op);
2408
2409	// First try to convert the shift (by vector) to a shift by a scalar.
2410	// If we first split the shift, the shift amount will become 'extract
2411	// subvector', and will no longer be recognized as scalar.
2412	SDValue Res = Op;
2413	if (SDValue S = getVectorShiftByInt(Op, DAG))
2414	Res = S;
2415
2416	unsigned Opc = Res.getOpcode();
2417	switch (Opc) {
2418	case HexagonISD::VASR:
2419	case HexagonISD::VLSR:
2420	case HexagonISD::VASL:
2421	break;
2422	default:
2423	// No instructions for shifts by non-scalars.
2424	return SDValue ();
2425	}
2426
2427	MVT ResTy = ty(Op: Res);
2428	if (ResTy.getVectorElementType() != MVT::i8)
2429	return Res;
2430
2431	// For shifts of i8, extend the inputs to i16, then truncate back to i8.
2432	assert(ResTy.getVectorElementType() == MVT::i8);
2433	SDValue Val = Res.getOperand(i: `0`), Amt = Res.getOperand(i: `1`);
2434
2435	auto ShiftPartI8 = [&dl, &DAG, this](unsigned Opc, SDValue V, SDValue A) {
2436	MVT Ty = ty(Op: V);
2437	MVT ExtTy = MVT::getVectorVT(MVT::i16, Ty.getVectorNumElements());
2438	SDValue ExtV = Opc == HexagonISD::VASR ? DAG.getSExtOrTrunc(Op: V, DL: dl, VT: ExtTy)
2439	: DAG.getZExtOrTrunc(Op: V, DL: dl, VT: ExtTy);
2440	SDValue ExtS = DAG.getNode(Opcode: Opc, DL: dl, VT: ExtTy, Ops: {ExtV, A});
2441	return DAG.getZExtOrTrunc(Op: ExtS, DL: dl, VT: Ty);
2442	};
2443
2444	if (ResTy.getSizeInBits() == `32`)
2445	return ShiftPartI8 (Opc, Val, Amt);
2446
2447	auto [LoV, HiV] = opSplit(Vec: Val, dl, DAG);
2448	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: ResTy,
2449	Ops: {ShiftPartI8 (Opc, LoV, Amt), ShiftPartI8 (Opc, HiV, Amt)});
2450	}
2451
2452	SDValue
2453	HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
2454	if (isa<ConstantSDNode>(Val: Op.getOperand(i: `1`).getNode()))
2455	return Op;
2456	return SDValue ();
2457	}
2458
2459	SDValue
2460	HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
2461	MVT ResTy = ty(Op);
2462	SDValue InpV = Op.getOperand(i: `0`);
2463	MVT InpTy = ty(Op: InpV);
2464	assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
2465	const SDLoc &dl(Op);
2466
2467	// Handle conversion from i8 to v8i1.
2468	if (InpTy == MVT::i8) {
2469	if (ResTy == MVT::v8i1) {
2470	SDValue Sc = DAG.getBitcast(VT: tyScalar(Ty: InpTy), V: InpV);
2471	SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2472	return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2473	}
2474	return SDValue ();
2475	}
2476
2477	return Op;
2478	}
2479
2480	bool
2481	HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
2482	MVT VecTy, SelectionDAG &DAG,
2483	MutableArrayRef<ConstantInt> Consts) const* {
2484	MVT ElemTy = VecTy.getVectorElementType();
2485	unsigned ElemWidth = ElemTy.getSizeInBits();
2486	IntegerType IntTy = IntegerType::get(C&: DAG.getContext(), NumBits: ElemWidth);
2487	bool AllConst = true;
2488
2489	for (unsigned i = `0`, e = Values.size(); i != e; ++i) {
2490	SDValue V = Values [i];
2491	if (V.isUndef()) {
2492	Consts [i] = ConstantInt::get(Ty: IntTy, V: `0`);
2493	continue;
2494	}
2495	// Make sure to always cast to IntTy.
2496	if (auto *CN = dyn_cast<ConstantSDNode>(Val: V.getNode())) {
2497	const ConstantInt *CI = CN->getConstantIntValue();
2498	Consts [i] = ConstantInt::get(Ty: IntTy, V: CI->getValue().getSExtValue());
2499	} else if (auto *CN = dyn_cast<ConstantFPSDNode>(Val: V.getNode())) {
2500	const ConstantFP *CF = CN->getConstantFPValue();
2501	APInt A = CF->getValueAPF().bitcastToAPInt();
2502	Consts [i] = ConstantInt::get(Ty: IntTy, V: A.getZExtValue());
2503	} else {
2504	AllConst = false;
2505	}
2506	}
2507	return AllConst;
2508	}
2509
2510	SDValue
2511	HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
2512	MVT VecTy, SelectionDAG &DAG) const {
2513	MVT ElemTy = VecTy.getVectorElementType();
2514	assert(VecTy.getVectorNumElements() == Elem.size());
2515
2516	SmallVector<ConstantInt*,`4`> Consts(Elem.size());
2517	bool AllConst = getBuildVectorConstInts(Values: Elem, VecTy, DAG, Consts);
2518
2519	unsigned First, Num = Elem.size();
2520	for (First = `0`; First != Num; ++First) {
2521	if (!isUndef(Op: Elem [First]))
2522	break;
2523	}
2524	if (First == Num)
2525	return DAG.getUNDEF(VT: VecTy);
2526
2527	if (AllConst &&
2528	llvm::all_of(Range&: Consts, P: [](ConstantInt CI) { return* CI->isZero(); }))
2529	return getZero(dl, Ty: VecTy, DAG);
2530
2531	if (ElemTy == MVT::i16 \|\| ElemTy == MVT::f16) {
2532	assert(Elem.size() == `2`);
2533	if (AllConst) {
2534	// The 'Consts' array will have all values as integers regardless
2535	// of the vector element type.
2536	uint32_t V = (Consts [`0`]->getZExtValue() & `0xFFFF`) \|
2537	Consts [`1`]->getZExtValue() << `16`;
2538	return DAG.getBitcast(VecTy, DAG.getConstant(V, dl, MVT::i32));
2539	}
2540	SDValue E0, E1;
2541	if (ElemTy == MVT::f16) {
2542	E0 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[`0`]), dl, MVT::i32);
2543	E1 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[`1`]), dl, MVT::i32);
2544	} else {
2545	E0 = Elem [`0`];
2546	E1 = Elem [`1`];
2547	}
2548	SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {E1, E0}, DAG);
2549	return DAG.getBitcast(VT: VecTy, V: N);
2550	}
2551
2552	if (ElemTy == MVT::i8) {
2553	// First try generating a constant.
2554	if (AllConst) {
2555	int32_t V = (Consts [`0`]->getZExtValue() & `0xFF`) \|
2556	(Consts [`1`]->getZExtValue() & `0xFF`) << `8` \|
2557	(Consts [`2`]->getZExtValue() & `0xFF`) << `16` \|
2558	Consts [`3`]->getZExtValue() << `24`;
2559	return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
2560	}
2561
2562	// Then try splat.
2563	bool IsSplat = true;
2564	for (unsigned i = First+`1`; i != Num; ++i) {
2565	if (Elem [i] == Elem [First] \|\| isUndef(Op: Elem [i]))
2566	continue;
2567	IsSplat = false;
2568	break;
2569	}
2570	if (IsSplat) {
2571	// Legalize the operand of SPLAT_VECTOR.
2572	SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2573	return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Ext);
2574	}
2575
2576	// Generate
2577	// (zxtb(Elem[0]) \| (zxtb(Elem[1]) << 8)) \|
2578	// (zxtb(Elem[2]) \| (zxtb(Elem[3]) << 8)) << 16
2579	assert(Elem.size() == `4`);
2580	SDValue Vs[`4`];
2581	for (unsigned i = `0`; i != `4`; ++i) {
2582	Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
2583	Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
2584	}
2585	SDValue S8 = DAG.getConstant(`8`, dl, MVT::i32);
2586	SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[`1`], S8});
2587	SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[`3`], S8});
2588	SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[`0`], T0});
2589	SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[`2`], T1});
2590
2591	SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
2592	return DAG.getBitcast(MVT::v4i8, R);
2593	}
2594
2595	#ifndef NDEBUG
2596	dbgs() << "VecTy: " << VecTy << `'\n'`;
2597	#endif
2598	llvm_unreachable("Unexpected vector element type");
2599	}
2600
2601	SDValue
2602	HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
2603	MVT VecTy, SelectionDAG &DAG) const {
2604	MVT ElemTy = VecTy.getVectorElementType();
2605	assert(VecTy.getVectorNumElements() == Elem.size());
2606
2607	SmallVector<ConstantInt*,`8`> Consts(Elem.size());
2608	bool AllConst = getBuildVectorConstInts(Values: Elem, VecTy, DAG, Consts);
2609
2610	unsigned First, Num = Elem.size();
2611	for (First = `0`; First != Num; ++First) {
2612	if (!isUndef(Op: Elem [First]))
2613	break;
2614	}
2615	if (First == Num)
2616	return DAG.getUNDEF(VT: VecTy);
2617
2618	if (AllConst &&
2619	llvm::all_of(Range&: Consts, P: [](ConstantInt CI) { return* CI->isZero(); }))
2620	return getZero(dl, Ty: VecTy, DAG);
2621
2622	// First try splat if possible.
2623	if (ElemTy == MVT::i16 \|\| ElemTy == MVT::f16) {
2624	bool IsSplat = true;
2625	for (unsigned i = First+`1`; i != Num; ++i) {
2626	if (Elem [i] == Elem [First] \|\| isUndef(Op: Elem [i]))
2627	continue;
2628	IsSplat = false;
2629	break;
2630	}
2631	if (IsSplat) {
2632	// Legalize the operand of SPLAT_VECTOR
2633	SDValue S = ElemTy == MVT::f16 ? DAG.getBitcast(MVT::i16, Elem[First])
2634	: Elem[First];
2635	SDValue Ext = DAG.getZExtOrTrunc(S, dl, MVT::i32);
2636	return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Ext);
2637	}
2638	}
2639
2640	// Then try constant.
2641	if (AllConst) {
2642	uint64_t Val = `0`;
2643	unsigned W = ElemTy.getSizeInBits();
2644	uint64_t Mask = (`1ull` << W) - `1`;
2645	for (unsigned i = `0`; i != Num; ++i)
2646	Val = (Val << W) \| (Consts [Num-`1`-i]->getZExtValue() & Mask);
2647	SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
2648	return DAG.getBitcast(VT: VecTy, V: V0);
2649	}
2650
2651	// Build two 32-bit vectors and concatenate.
2652	MVT HalfTy = MVT::getVectorVT(VT: ElemTy, NumElements: Num/`2`);
2653	SDValue L = (ElemTy == MVT::i32)
2654	? Elem[`0`]
2655	: buildVector32(Elem.take_front(Num/`2`), dl, HalfTy, DAG);
2656	SDValue H = (ElemTy == MVT::i32)
2657	? Elem[`1`]
2658	: buildVector32(Elem.drop_front(Num/`2`), dl, HalfTy, DAG);
2659	return getCombine(Hi: H, Lo: L, dl, ResTy: VecTy, DAG);
2660	}
2661
2662	SDValue
2663	HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
2664	const SDLoc &dl, MVT ValTy, MVT ResTy,
2665	SelectionDAG &DAG) const {
2666	MVT VecTy = ty(Op: VecV);
2667	assert(!ValTy.isVector() \|\|
2668	VecTy.getVectorElementType() == ValTy.getVectorElementType());
2669	if (VecTy.getVectorElementType() == MVT::i1)
2670	return extractVectorPred(VecV, IdxV, dl, ValTy, ResTy, DAG);
2671
2672	unsigned VecWidth = VecTy.getSizeInBits();
2673	unsigned ValWidth = ValTy.getSizeInBits();
2674	unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
2675	assert((VecWidth % ElemWidth) == `0`);
2676	assert(VecWidth == `32` \|\| VecWidth == `64`);
2677
2678	// Cast everything to scalar integer types.
2679	MVT ScalarTy = tyScalar(Ty: VecTy);
2680	VecV = DAG.getBitcast(VT: ScalarTy, V: VecV);
2681
2682	SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2683	SDValue ExtV;
2684
2685	if (auto *IdxN = dyn_cast<ConstantSDNode>(Val&: IdxV)) {
2686	unsigned Off = IdxN->getZExtValue() * ElemWidth;
2687	if (VecWidth == `64` && ValWidth == `32`) {
2688	assert(Off == `0` \|\| Off == `32`);
2689	ExtV = Off == `0` ? LoHalf(V: VecV, DAG) : HiHalf(V: VecV, DAG);
2690	} else if (Off == `0` && (ValWidth % `8`) == `0`) {
2691	ExtV = DAG.getZeroExtendInReg(Op: VecV, DL: dl, VT: tyScalar(Ty: ValTy));
2692	} else {
2693	SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
2694	// The return type of EXTRACTU must be the same as the type of the
2695	// input vector.
2696	ExtV = DAG.getNode(Opcode: HexagonISD::EXTRACTU, DL: dl, VT: ScalarTy,
2697	Ops: {VecV, WidthV, OffV});
2698	}
2699	} else {
2700	if (ty(IdxV) != MVT::i32)
2701	IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2702	SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2703	DAG.getConstant(ElemWidth, dl, MVT::i32));
2704	ExtV = DAG.getNode(Opcode: HexagonISD::EXTRACTU, DL: dl, VT: ScalarTy,
2705	Ops: {VecV, WidthV, OffV});
2706	}
2707
2708	// Cast ExtV to the requested result type.
2709	ExtV = DAG.getZExtOrTrunc(Op: ExtV, DL: dl, VT: tyScalar(Ty: ResTy));
2710	ExtV = DAG.getBitcast(VT: ResTy, V: ExtV);
2711	return ExtV;
2712	}
2713
2714	SDValue
2715	HexagonTargetLowering::extractVectorPred(SDValue VecV, SDValue IdxV,
2716	const SDLoc &dl, MVT ValTy, MVT ResTy,
2717	SelectionDAG &DAG) const {
2718	// Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2719	// without any coprocessors).
2720	MVT VecTy = ty(Op: VecV);
2721	unsigned VecWidth = VecTy.getSizeInBits();
2722	unsigned ValWidth = ValTy.getSizeInBits();
2723	assert(VecWidth == VecTy.getVectorNumElements() &&
2724	"Vector elements should equal vector width size");
2725	assert(VecWidth == `8` \|\| VecWidth == `4` \|\| VecWidth == `2`);
2726
2727	// Check if this is an extract of the lowest bit.
2728	if (isNullConstant(V: IdxV) && ValTy.getSizeInBits() == `1`) {
2729	// Extracting the lowest bit is a no-op, but it changes the type,
2730	// so it must be kept as an operation to avoid errors related to
2731	// type mismatches.
2732	return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
2733	}
2734
2735	// If the value extracted is a single bit, use tstbit.
2736	if (ValWidth == `1`) {
2737	SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2738	SDValue M0 = DAG.getConstant(`8` / VecWidth, dl, MVT::i32);
2739	SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
2740	return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
2741	}
2742
2743	// Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2744	// a predicate register. The elements of the vector are repeated
2745	// in the register (if necessary) so that the total number is 8.
2746	// The extracted subvector will need to be expanded in such a way.
2747	unsigned Scale = VecWidth / ValWidth;
2748
2749	// Generate (p2d VecV) >> 8Idx to move the interesting bytes to*
2750	// position 0.
2751	assert(ty(IdxV) == MVT::i32);
2752	unsigned VecRep = `8` / VecWidth;
2753	SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2754	DAG.getConstant(`8`*VecRep, dl, MVT::i32));
2755	SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2756	SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
2757	while (Scale > `1`) {
2758	// The longest possible subvector is at most 32 bits, so it is always
2759	// contained in the low subregister.
2760	T1 = LoHalf(V: T1, DAG);
2761	T1 = expandPredicate(Vec32: T1, dl, DAG);
2762	Scale /= `2`;
2763	}
2764
2765	return DAG.getNode(Opcode: HexagonISD::D2P, DL: dl, VT: ResTy, Operand: T1);
2766	}
2767
2768	SDValue
2769	HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
2770	const SDLoc &dl, MVT ValTy,
2771	SelectionDAG &DAG) const {
2772	MVT VecTy = ty(Op: VecV);
2773	if (VecTy.getVectorElementType() == MVT::i1)
2774	return insertVectorPred(VecV, ValV, IdxV, dl, ValTy, DAG);
2775
2776	unsigned VecWidth = VecTy.getSizeInBits();
2777	unsigned ValWidth = ValTy.getSizeInBits();
2778	assert(VecWidth == `32` \|\| VecWidth == `64`);
2779	assert((VecWidth % ValWidth) == `0`);
2780
2781	// Cast everything to scalar integer types.
2782	MVT ScalarTy = MVT::getIntegerVT(BitWidth: VecWidth);
2783	// The actual type of ValV may be different than ValTy (which is related
2784	// to the vector type).
2785	unsigned VW = ty(Op: ValV).getSizeInBits();
2786	ValV = DAG.getBitcast(VT: MVT::getIntegerVT(BitWidth: VW), V: ValV);
2787	VecV = DAG.getBitcast(VT: ScalarTy, V: VecV);
2788	if (VW != VecWidth)
2789	ValV = DAG.getAnyExtOrTrunc(Op: ValV, DL: dl, VT: ScalarTy);
2790
2791	SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2792	SDValue InsV;
2793
2794	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: IdxV)) {
2795	unsigned W = C->getZExtValue() * ValWidth;
2796	SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
2797	InsV = DAG.getNode(Opcode: HexagonISD::INSERT, DL: dl, VT: ScalarTy,
2798	Ops: {VecV, ValV, WidthV, OffV});
2799	} else {
2800	if (ty(IdxV) != MVT::i32)
2801	IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2802	SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
2803	InsV = DAG.getNode(Opcode: HexagonISD::INSERT, DL: dl, VT: ScalarTy,
2804	Ops: {VecV, ValV, WidthV, OffV});
2805	}
2806
2807	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: VecTy, Operand: InsV);
2808	}
2809
2810	SDValue
2811	HexagonTargetLowering::insertVectorPred(SDValue VecV, SDValue ValV,
2812	SDValue IdxV, const SDLoc &dl,
2813	MVT ValTy, SelectionDAG &DAG) const {
2814	MVT VecTy = ty(Op: VecV);
2815	unsigned VecLen = VecTy.getVectorNumElements();
2816
2817	if (ValTy == MVT::i1) {
2818	SDValue ToReg = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2819	SDValue Ext = DAG.getSExtOrTrunc(ValV, dl, MVT::i32);
2820	SDValue Width = DAG.getConstant(`8` / VecLen, dl, MVT::i32);
2821	SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
2822	SDValue Ins =
2823	DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, {ToReg, Ext, Width, Idx});
2824	return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Ins}, DAG);
2825	}
2826
2827	assert(ValTy.getVectorElementType() == MVT::i1);
2828	SDValue ValR = ValTy.isVector()
2829	? DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV)
2830	: DAG.getSExtOrTrunc(ValV, dl, MVT::i64);
2831
2832	unsigned Scale = VecLen / ValTy.getVectorNumElements();
2833	assert(Scale > `1`);
2834
2835	for (unsigned R = Scale; R > `1`; R /= `2`) {
2836	ValR = contractPredicate(Vec64: ValR, dl, DAG);
2837	ValR = getCombine(DAG.getUNDEF(MVT::i32), ValR, dl, MVT::i64, DAG);
2838	}
2839
2840	SDValue Width = DAG.getConstant(`64` / Scale, dl, MVT::i32);
2841	SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
2842	SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2843	SDValue Ins =
2844	DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, {VecR, ValR, Width, Idx});
2845	return DAG.getNode(Opcode: HexagonISD::D2P, DL: dl, VT: VecTy, Operand: Ins);
2846	}
2847
2848	SDValue
2849	HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
2850	SelectionDAG &DAG) const {
2851	assert(ty(Vec32).getSizeInBits() == `32`);
2852	if (isUndef(Vec32))
2853	return DAG.getUNDEF(MVT::i64);
2854	SDValue P = DAG.getBitcast(MVT::v4i8, Vec32);
2855	SDValue X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i16, P);
2856	return DAG.getBitcast(MVT::i64, X);
2857	}
2858
2859	SDValue
2860	HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
2861	SelectionDAG &DAG) const {
2862	assert(ty(Vec64).getSizeInBits() == `64`);
2863	if (isUndef(Vec64))
2864	return DAG.getUNDEF(MVT::i32);
2865	// Collect even bytes:
2866	SDValue A = DAG.getBitcast(MVT::v8i8, Vec64);
2867	SDValue S = DAG.getVectorShuffle(MVT::v8i8, dl, A, DAG.getUNDEF(MVT::v8i8),
2868	{`0`, `2`, `4`, `6`, `1`, `3`, `5`, `7`});
2869	return extractVector(S, DAG.getConstant(`0`, dl, MVT::i32), dl, MVT::v4i8,
2870	MVT::i32, DAG);
2871	}
2872
2873	SDValue
2874	HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
2875	const {
2876	if (Ty.isVector()) {
2877	unsigned W = Ty.getSizeInBits();
2878	if (W <= `64`)
2879	return DAG.getBitcast(VT: Ty, V: DAG.getConstant(Val: `0`, DL: dl, VT: MVT::getIntegerVT(BitWidth: W)));
2880	return DAG.getNode(ISD::SPLAT_VECTOR, dl, Ty, getZero(dl, MVT::i32, DAG));
2881	}
2882
2883	if (Ty.isInteger())
2884	return DAG.getConstant(Val: `0`, DL: dl, VT: Ty);
2885	if (Ty.isFloatingPoint())
2886	return DAG.getConstantFP(Val: `0.0`, DL: dl, VT: Ty);
2887	llvm_unreachable("Invalid type for zero");
2888	}
2889
2890	SDValue
2891	HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
2892	const {
2893	MVT ValTy = ty(Op: Val);
2894	assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
2895
2896	unsigned ValLen = ValTy.getVectorNumElements();
2897	unsigned ResLen = ResTy.getVectorNumElements();
2898	if (ValLen == ResLen)
2899	return Val;
2900
2901	const SDLoc &dl(Val);
2902	assert(ValLen < ResLen);
2903	assert(ResLen % ValLen == `0`);
2904
2905	SmallVector<SDValue, `4`> Concats = {Val};
2906	for (unsigned i = `1`, e = ResLen / ValLen; i < e; ++i)
2907	Concats.push_back(Elt: DAG.getUNDEF(VT: ValTy));
2908
2909	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: ResTy, Ops: Concats);
2910	}
2911
2912	SDValue
2913	HexagonTargetLowering::getCombine(SDValue Hi, SDValue Lo, const SDLoc &dl,
2914	MVT ResTy, SelectionDAG &DAG) const {
2915	MVT ElemTy = ty(Op: Hi);
2916	assert(ElemTy == ty(Lo));
2917
2918	if (!ElemTy.isVector()) {
2919	assert(ElemTy.isScalarInteger());
2920	MVT PairTy = MVT::getIntegerVT(BitWidth: `2` * ElemTy.getSizeInBits());
2921	SDValue Pair = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: PairTy, N1: Lo, N2: Hi);
2922	return DAG.getBitcast(VT: ResTy, V: Pair);
2923	}
2924
2925	unsigned Width = ElemTy.getSizeInBits();
2926	MVT IntTy = MVT::getIntegerVT(BitWidth: Width);
2927	MVT PairTy = MVT::getIntegerVT(BitWidth: `2` * Width);
2928	SDValue Pair =
2929	DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: PairTy,
2930	Ops: {DAG.getBitcast(VT: IntTy, V: Lo), DAG.getBitcast(VT: IntTy, V: Hi)});
2931	return DAG.getBitcast(VT: ResTy, V: Pair);
2932	}
2933
2934	SDValue
2935	HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
2936	MVT VecTy = ty(Op);
2937	unsigned BW = VecTy.getSizeInBits();
2938	const SDLoc &dl(Op);
2939	SmallVector<SDValue,`8`> Ops;
2940	for (unsigned i = `0`, e = Op.getNumOperands(); i != e; ++i)
2941	Ops.push_back(Elt: Op.getOperand(i));
2942
2943	if (BW == `32`)
2944	return buildVector32(Elem: Ops, dl, VecTy, DAG);
2945	if (BW == `64`)
2946	return buildVector64(Elem: Ops, dl, VecTy, DAG);
2947
2948	if (VecTy == MVT::v8i1 \|\| VecTy == MVT::v4i1 \|\| VecTy == MVT::v2i1) {
2949	// Check if this is a special case or all-0 or all-1.
2950	bool All0 = true, All1 = true;
2951	for (SDValue P : Ops) {
2952	auto *CN = dyn_cast<ConstantSDNode>(Val: P.getNode());
2953	if (CN == nullptr) {
2954	All0 = All1 = false;
2955	break;
2956	}
2957	uint32_t C = CN->getZExtValue();
2958	All0 &= (C == `0`);
2959	All1 &= (C == `1`);
2960	}
2961	if (All0)
2962	return DAG.getNode(Opcode: HexagonISD::PFALSE, DL: dl, VT: VecTy);
2963	if (All1)
2964	return DAG.getNode(Opcode: HexagonISD::PTRUE, DL: dl, VT: VecTy);
2965
2966	// For each i1 element in the resulting predicate register, put 1
2967	// shifted by the index of the element into a general-purpose register,
2968	// then or them together and transfer it back into a predicate register.
2969	SDValue Rs[`8`];
2970	SDValue Z = getZero(dl, MVT::i32, DAG);
2971	// Always produce 8 bits, repeat inputs if necessary.
2972	unsigned Rep = `8` / VecTy.getVectorNumElements();
2973	for (unsigned i = `0`; i != `8`; ++i) {
2974	SDValue S = DAG.getConstant(`1ull` << i, dl, MVT::i32);
2975	Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
2976	}
2977	for (ArrayRef<SDValue> A(Rs); A.size() != `1`; A = A.drop_back(N: A.size()/`2`)) {
2978	for (unsigned i = `0`, e = A.size()/`2`; i != e; ++i)
2979	Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[`2`i], Rs[`2`i+`1`]);
2980	}
2981	// Move the value directly to a predicate register.
2982	return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[`0`]}, DAG);
2983	}
2984
2985	return SDValue ();
2986	}
2987
2988	SDValue
2989	HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
2990	SelectionDAG &DAG) const {
2991	MVT VecTy = ty(Op);
2992	const SDLoc &dl(Op);
2993	if (VecTy.getSizeInBits() == `64`) {
2994	assert(Op.getNumOperands() == `2`);
2995	return getCombine(Hi: Op.getOperand(i: `1`), Lo: Op.getOperand(i: `0`), dl, ResTy: VecTy, DAG);
2996	}
2997
2998	MVT ElemTy = VecTy.getVectorElementType();
2999	if (ElemTy == MVT::i1) {
3000	assert(VecTy == MVT::v2i1 \|\| VecTy == MVT::v4i1 \|\| VecTy == MVT::v8i1);
3001	MVT OpTy = ty(Op: Op.getOperand(i: `0`));
3002	// Scale is how many times the operands need to be contracted to match
3003	// the representation in the target register.
3004	unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
3005	assert(Scale == Op.getNumOperands() && Scale > `1`);
3006
3007	// First, convert all bool vectors to integers, then generate pairwise
3008	// inserts to form values of doubled length. Up until there are only
3009	// two values left to concatenate, all of these values will fit in a
3010	// 32-bit integer, so keep them as i32 to use 32-bit inserts.
3011	SmallVector<SDValue,`4`> Words[`2`];
3012	unsigned IdxW = `0`;
3013
3014	for (SDValue P : Op.getNode()->op_values()) {
3015	SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
3016	for (unsigned R = Scale; R > `1`; R /= `2`) {
3017	W = contractPredicate(Vec64: W, dl, DAG);
3018	W = getCombine(DAG.getUNDEF(MVT::i32), W, dl, MVT::i64, DAG);
3019	}
3020	W = LoHalf(V: W, DAG);
3021	Words[IdxW].push_back(Elt: W);
3022	}
3023
3024	while (Scale > `2`) {
3025	SDValue WidthV = DAG.getConstant(`64` / Scale, dl, MVT::i32);
3026	Words[IdxW ^ `1`].clear();
3027
3028	for (unsigned i = `0`, e = Words[IdxW].size(); i != e; i += `2`) {
3029	SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+`1`];
3030	// Insert W1 into W0 right next to the significant bits of W0.
3031	SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
3032	{W0, W1, WidthV, WidthV});
3033	Words[IdxW ^ `1`].push_back(Elt: T);
3034	}
3035	IdxW ^= `1`;
3036	Scale /= `2`;
3037	}
3038
3039	// At this point there should only be two words left, and Scale should be 2.
3040	assert(Scale == `2` && Words[IdxW].size() == `2`);
3041
3042	SDValue WW = getCombine(Words[IdxW][`1`], Words[IdxW][`0`], dl, MVT::i64, DAG);
3043	return DAG.getNode(Opcode: HexagonISD::D2P, DL: dl, VT: VecTy, Operand: WW);
3044	}
3045
3046	return SDValue ();
3047	}
3048
3049	SDValue
3050	HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
3051	SelectionDAG &DAG) const {
3052	SDValue Vec = Op.getOperand(i: `0`);
3053	MVT ElemTy = ty(Op: Vec).getVectorElementType();
3054	return extractVector(VecV: Vec, IdxV: Op.getOperand(i: `1`), dl: SDLoc (Op), ValTy: ElemTy, ResTy: ty(Op), DAG);
3055	}
3056
3057	SDValue
3058	HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
3059	SelectionDAG &DAG) const {
3060	return extractVector(VecV: Op.getOperand(i: `0`), IdxV: Op.getOperand(i: `1`), dl: SDLoc (Op),
3061	ValTy: ty(Op), ResTy: ty(Op), DAG);
3062	}
3063
3064	SDValue
3065	HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3066	SelectionDAG &DAG) const {
3067	return insertVector(VecV: Op.getOperand(i: `0`), ValV: Op.getOperand(i: `1`), IdxV: Op.getOperand(i: `2`),
3068	dl: SDLoc (Op), ValTy: ty(Op).getVectorElementType(), DAG);
3069	}
3070
3071	SDValue
3072	HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
3073	SelectionDAG &DAG) const {
3074	SDValue ValV = Op.getOperand(i: `1`);
3075	return insertVector(VecV: Op.getOperand(i: `0`), ValV, IdxV: Op.getOperand(i: `2`),
3076	dl: SDLoc (Op), ValTy: ty(Op: ValV), DAG);
3077	}
3078
3079	bool
3080	HexagonTargetLowering::allowTruncateForTailCall(Type Ty1, Type Ty2) const {
3081	// Assuming the caller does not have either a signext or zeroext modifier, and
3082	// only one value is accepted, any reasonable truncation is allowed.
3083	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
3084	return false;
3085
3086	// FIXME: in principle up to 64-bit could be made safe, but it would be very
3087	// fragile at the moment: any support for multiple value returns would be
3088	// liable to disallow tail calls involving i64 -> iN truncation in many cases.
3089	return Ty1->getPrimitiveSizeInBits() <= `32`;
3090	}
3091
3092	SDValue
3093	HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
3094	MVT Ty = ty(Op);
3095	const SDLoc &dl(Op);
3096	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
3097	MVT MemTy = LN->getMemoryVT().getSimpleVT();
3098	ISD::LoadExtType ET = LN->getExtensionType();
3099
3100	bool LoadPred = MemTy == MVT::v2i1 \|\| MemTy == MVT::v4i1 \|\| MemTy == MVT::v8i1;
3101	if (LoadPred) {
3102	SDValue NL = DAG.getLoad(
3103	LN->getAddressingMode(), ISD::ZEXTLOAD, MVT::i32, dl, LN->getChain(),
3104	LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(),
3105	/MemoryVT/ MVT::i8, LN->getAlign(), LN->getMemOperand()->getFlags(),
3106	LN->getAAInfo(), LN->getRanges());
3107	LN = cast<LoadSDNode>(Val: NL.getNode());
3108	}
3109
3110	Align ClaimAlign = LN->getAlign();
3111	if (!validateConstPtrAlignment(Ptr: LN->getBasePtr(), NeedAlign: ClaimAlign, dl, DAG))
3112	return replaceMemWithUndef(Op, DAG);
3113
3114	// Call LowerUnalignedLoad for all loads, it recognizes loads that
3115	// don't need extra aligning.
3116	SDValue LU = LowerUnalignedLoad(Op: SDValue (LN, `0`), DAG);
3117	if (LoadPred) {
3118	SDValue TP = getInstr(Hexagon::C2_tfrrp, dl, MemTy, {LU}, DAG);
3119	if (ET == ISD::SEXTLOAD) {
3120	TP = DAG.getSExtOrTrunc(Op: TP, DL: dl, VT: Ty);
3121	} else if (ET != ISD::NON_EXTLOAD) {
3122	TP = DAG.getZExtOrTrunc(Op: TP, DL: dl, VT: Ty);
3123	}
3124	SDValue Ch = cast<LoadSDNode>(Val: LU.getNode())->getChain();
3125	return DAG.getMergeValues(Ops: {TP, Ch}, dl);
3126	}
3127	return LU;
3128	}
3129
3130	SDValue
3131	HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
3132	const SDLoc &dl(Op);
3133	StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
3134	SDValue Val = SN->getValue();
3135	MVT Ty = ty(Op: Val);
3136
3137	if (Ty == MVT::v2i1 \|\| Ty == MVT::v4i1 \|\| Ty == MVT::v8i1) {
3138	// Store the exact predicate (all bits).
3139	SDValue TR = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {Val}, DAG);
3140	SDValue NS = DAG.getTruncStore(SN->getChain(), dl, TR, SN->getBasePtr(),
3141	MVT::i8, SN->getMemOperand());
3142	if (SN->isIndexed()) {
3143	NS = DAG.getIndexedStore(OrigStore: NS, dl, Base: SN->getBasePtr(), Offset: SN->getOffset(),
3144	AM: SN->getAddressingMode());
3145	}
3146	SN = cast<StoreSDNode>(Val: NS.getNode());
3147	}
3148
3149	Align ClaimAlign = SN->getAlign();
3150	if (!validateConstPtrAlignment(Ptr: SN->getBasePtr(), NeedAlign: ClaimAlign, dl, DAG))
3151	return replaceMemWithUndef(Op, DAG);
3152
3153	MVT StoreTy = SN->getMemoryVT().getSimpleVT();
3154	Align NeedAlign = Subtarget.getTypeAlignment(Ty: StoreTy);
3155	if (ClaimAlign < NeedAlign)
3156	return expandUnalignedStore(ST: SN, DAG);
3157	return SDValue (SN, `0`);
3158	}
3159
3160	SDValue
3161	HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
3162	const {
3163	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
3164	MVT LoadTy = ty(Op);
3165	unsigned NeedAlign = Subtarget.getTypeAlignment(Ty: LoadTy).value();
3166	unsigned HaveAlign = LN->getAlign().value();
3167	if (HaveAlign >= NeedAlign)
3168	return Op;
3169
3170	const SDLoc &dl(Op);
3171	const DataLayout &DL = DAG.getDataLayout();
3172	LLVMContext &Ctx = *DAG.getContext();
3173
3174	// If the load aligning is disabled or the load can be broken up into two
3175	// smaller legal loads, do the default (target-independent) expansion.
3176	bool DoDefault = false;
3177	// Handle it in the default way if this is an indexed load.
3178	if (!LN->isUnindexed())
3179	DoDefault = true;
3180
3181	if (!AlignLoads) {
3182	if (allowsMemoryAccessForAlignment(Context&: Ctx, DL, VT: LN->getMemoryVT(),
3183	MMO: *LN->getMemOperand()))
3184	return Op;
3185	DoDefault = true;
3186	}
3187	if (!DoDefault && (`2` * HaveAlign) == NeedAlign) {
3188	// The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
3189	MVT PartTy = HaveAlign <= `8` ? MVT::getIntegerVT(`8` * HaveAlign)
3190	: MVT::getVectorVT(MVT::i8, HaveAlign);
3191	DoDefault =
3192	allowsMemoryAccessForAlignment(Context&: Ctx, DL, VT: PartTy, MMO: *LN->getMemOperand());
3193	}
3194	if (DoDefault) {
3195	std::pair<SDValue, SDValue> P = expandUnalignedLoad(LD: LN, DAG);
3196	return DAG.getMergeValues(Ops: {P.first, P.second}, dl);
3197	}
3198
3199	// The code below generates two loads, both aligned as NeedAlign, and
3200	// with the distance of NeedAlign between them. For that to cover the
3201	// bits that need to be loaded (and without overlapping), the size of
3202	// the loads should be equal to NeedAlign. This is true for all loadable
3203	// types, but add an assertion in case something changes in the future.
3204	assert(LoadTy.getSizeInBits() == `8`*NeedAlign);
3205
3206	unsigned LoadLen = NeedAlign;
3207	SDValue Base = LN->getBasePtr();
3208	SDValue Chain = LN->getChain();
3209	auto BO = getBaseAndOffset(Addr: Base);
3210	unsigned BaseOpc = BO.first.getOpcode();
3211	if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == `0`)
3212	return Op;
3213
3214	if (BO.second % LoadLen != `0`) {
3215	BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
3216	DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
3217	BO.second -= BO.second % LoadLen;
3218	}
3219	SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
3220	? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
3221	DAG.getConstant(NeedAlign, dl, MVT::i32))
3222	: BO.first;
3223	SDValue Base0 =
3224	DAG.getMemBasePlusOffset(Base: BaseNoOff, Offset: TypeSize::getFixed(ExactSize: BO.second), DL: dl);
3225	SDValue Base1 = DAG.getMemBasePlusOffset(
3226	Base: BaseNoOff, Offset: TypeSize::getFixed(ExactSize: BO.second + LoadLen), DL: dl);
3227
3228	MachineMemOperand WideMMO = nullptr*;
3229	if (MachineMemOperand *MMO = LN->getMemOperand()) {
3230	MachineFunction &MF = DAG.getMachineFunction();
3231	WideMMO = MF.getMachineMemOperand(
3232	PtrInfo: MMO->getPointerInfo(), F: MMO->getFlags(), Size: `2` * LoadLen, BaseAlignment: Align (LoadLen),
3233	AAInfo: MMO->getAAInfo(), Ranges: MMO->getRanges(), SSID: MMO->getSyncScopeID(),
3234	Ordering: MMO->getSuccessOrdering(), FailureOrdering: MMO->getFailureOrdering());
3235	}
3236
3237	SDValue Load0 = DAG.getLoad(VT: LoadTy, dl, Chain, Ptr: Base0, MMO: WideMMO);
3238	SDValue Load1 = DAG.getLoad(VT: LoadTy, dl, Chain, Ptr: Base1, MMO: WideMMO);
3239
3240	SDValue Aligned = DAG.getNode(Opcode: HexagonISD::VALIGN, DL: dl, VT: LoadTy,
3241	Ops: {Load1, Load0, BaseNoOff.getOperand(i: `0`)});
3242	SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3243	Load0.getValue(`1`), Load1.getValue(`1`));
3244	SDValue M = DAG.getMergeValues(Ops: {Aligned, NewChain}, dl);
3245	return M;
3246	}
3247
3248	SDValue
3249	HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
3250	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
3251	auto *CY = dyn_cast<ConstantSDNode>(Val&: Y);
3252	if (!CY)
3253	return SDValue ();
3254
3255	const SDLoc &dl(Op);
3256	SDVTList VTs = Op.getNode()->getVTList();
3257	assert(VTs.NumVTs == `2`);
3258	assert(VTs.VTs[`1`] == MVT::i1);
3259	unsigned Opc = Op.getOpcode();
3260
3261	if (CY) {
3262	uint64_t VY = CY->getZExtValue();
3263	assert(VY != `0` && "This should have been folded");
3264	// X +/- 1
3265	if (VY != `1`)
3266	return SDValue ();
3267
3268	if (Opc == ISD::UADDO) {
3269	SDValue Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VTs.VTs[`0`], Ops: {X, Y});
3270	SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
3271	ISD::SETEQ);
3272	return DAG.getMergeValues(Ops: {Op, Ov}, dl);
3273	}
3274	if (Opc == ISD::USUBO) {
3275	SDValue Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: VTs.VTs[`0`], Ops: {X, Y});
3276	SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
3277	DAG.getConstant(-`1`, dl, ty(Op)), ISD::SETEQ);
3278	return DAG.getMergeValues(Ops: {Op, Ov}, dl);
3279	}
3280	}
3281
3282	return SDValue ();
3283	}
3284
3285	SDValue HexagonTargetLowering::LowerUAddSubOCarry(SDValue Op,
3286	SelectionDAG &DAG) const {
3287	const SDLoc &dl(Op);
3288	unsigned Opc = Op.getOpcode();
3289	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`), C = Op.getOperand(i: `2`);
3290
3291	if (Opc == ISD::UADDO_CARRY)
3292	return DAG.getNode(Opcode: HexagonISD::ADDC, DL: dl, VTList: Op.getNode()->getVTList(),
3293	Ops: { X, Y, C });
3294
3295	EVT CarryTy = C.getValueType();
3296	SDValue SubC = DAG.getNode(Opcode: HexagonISD::SUBC, DL: dl, VTList: Op.getNode()->getVTList(),
3297	Ops: { X, Y, DAG.getLogicalNOT(DL: dl, Val: C, VT: CarryTy) });
3298	SDValue Out[] = { SubC.getValue(R: `0`),
3299	DAG.getLogicalNOT(DL: dl, Val: SubC.getValue(R: `1`), VT: CarryTy) };
3300	return DAG.getMergeValues(Ops: Out, dl);
3301	}
3302
3303	SDValue
3304	HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
3305	SDValue Chain = Op.getOperand(i: `0`);
3306	SDValue Offset = Op.getOperand(i: `1`);
3307	SDValue Handler = Op.getOperand(i: `2`);
3308	SDLoc dl(Op);
3309	auto PtrVT = getPointerTy(DL: DAG.getDataLayout());
3310
3311	// Mark function as containing a call to EH_RETURN.
3312	HexagonMachineFunctionInfo *FuncInfo =
3313	DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
3314	FuncInfo->setHasEHReturn();
3315
3316	unsigned OffsetReg = Hexagon::R28;
3317
3318	SDValue StoreAddr =
3319	DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
3320	DAG.getIntPtrConstant(`4`, dl));
3321	Chain = DAG.getStore(Chain, dl, Val: Handler, Ptr: StoreAddr, PtrInfo: MachinePointerInfo ());
3322	Chain = DAG.getCopyToReg(Chain, dl, Reg: OffsetReg, N: Offset);
3323
3324	// Not needed we already use it as explict input to EH_RETURN.
3325	// MF.getRegInfo().addLiveOut(OffsetReg);
3326
3327	return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
3328	}
3329
3330	SDValue
3331	HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3332	unsigned Opc = Op.getOpcode();
3333
3334	// Handle INLINEASM first.
3335	if (Opc == ISD::INLINEASM \|\| Opc == ISD::INLINEASM_BR)
3336	return LowerINLINEASM(Op, DAG);
3337
3338	if (isHvxOperation(N: Op.getNode(), DAG)) {
3339	// If HVX lowering returns nothing, try the default lowering.
3340	if (SDValue V = LowerHvxOperation(Op, DAG))
3341	return V;
3342	}
3343
3344	switch (Opc) {
3345	default:
3346	#ifndef NDEBUG
3347	Op.getNode()->dumpr(G: &DAG);
3348	if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
3349	errs() << "Error: check for a non-legal type in this operation\n";
3350	#endif
3351	llvm_unreachable("Should not custom lower this!");
3352
3353	case ISD::FDIV:
3354	return LowerFDIV(Op, DAG);
3355	case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3356	case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
3357	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
3358	case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
3359	case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3360	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
3361	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3362	case ISD::BITCAST: return LowerBITCAST(Op, DAG);
3363	case ISD::LOAD: return LowerLoad(Op, DAG);
3364	case ISD::STORE: return LowerStore(Op, DAG);
3365	case ISD::UADDO:
3366	case ISD::USUBO: return LowerUAddSubO(Op, DAG);
3367	case ISD::UADDO_CARRY:
3368	case ISD::USUBO_CARRY: return LowerUAddSubOCarry(Op, DAG);
3369	case ISD::SRA:
3370	case ISD::SHL:
3371	case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
3372	case ISD::ROTL: return LowerROTL(Op, DAG);
3373	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
3374	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
3375	case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
3376	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
3377	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
3378	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
3379	case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
3380	case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
3381	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
3382	case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3383	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
3384	case ISD::VASTART: return LowerVASTART(Op, DAG);
3385	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
3386	case ISD::SETCC: return LowerSETCC(Op, DAG);
3387	case ISD::VSELECT: return LowerVSELECT(Op, DAG);
3388	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3389	case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
3390	case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
3391	case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
3392	break;
3393	}
3394
3395	return SDValue ();
3396	}
3397
3398	void
3399	HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
3400	SmallVectorImpl<SDValue> &Results,
3401	SelectionDAG &DAG) const {
3402	if (isHvxOperation(N, DAG)) {
3403	LowerHvxOperationWrapper(N, Results, DAG);
3404	if (!Results.empty())
3405	return;
3406	}
3407
3408	SDValue Op(N, `0`);
3409	unsigned Opc = N->getOpcode();
3410
3411	switch (Opc) {
3412	case HexagonISD::SSAT:
3413	case HexagonISD::USAT:
3414	Results.push_back(Elt: opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG));
3415	break;
3416	case ISD::STORE:
3417	// We are only custom-lowering stores to verify the alignment of the
3418	// address if it is a compile-time constant. Since a store can be
3419	// modified during type-legalization (the value being stored may need
3420	// legalization), return empty Results here to indicate that we don't
3421	// really make any changes in the custom lowering.
3422	return;
3423	default:
3424	TargetLowering::LowerOperationWrapper(N, Results, DAG);
3425	break;
3426	}
3427	}
3428
3429	void
3430	HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
3431	SmallVectorImpl<SDValue> &Results,
3432	SelectionDAG &DAG) const {
3433	if (isHvxOperation(N, DAG)) {
3434	ReplaceHvxNodeResults(N, Results, DAG);
3435	if (!Results.empty())
3436	return;
3437	}
3438
3439	const SDLoc &dl(N);
3440	switch (N->getOpcode()) {
3441	case ISD::SRL:
3442	case ISD::SRA:
3443	case ISD::SHL:
3444	return;
3445	case ISD::BITCAST:
3446	// Handle a bitcast from v8i1 to i8.
3447	if (N->getValueType(`0`) == MVT::i8) {
3448	if (N->getOperand(`0`).getValueType() == MVT::v8i1) {
3449	SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
3450	N->getOperand(`0`), DAG);
3451	SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
3452	Results.push_back(Elt: T);
3453	}
3454	}
3455	break;
3456	}
3457	}
3458
3459	SDValue
3460	HexagonTargetLowering::PerformDAGCombine(SDNode *N,
3461	DAGCombinerInfo &DCI) const {
3462	if (isHvxOperation(N, DAG&: DCI.DAG)) {
3463	if (SDValue V = PerformHvxDAGCombine(N, DCI))
3464	return V;
3465	return SDValue ();
3466	}
3467
3468	SDValue Op(N, `0`);
3469	const SDLoc &dl(Op);
3470	unsigned Opc = Op.getOpcode();
3471
3472	if (Opc == ISD::TRUNCATE) {
3473	SDValue Op0 = Op.getOperand(i: `0`);
3474	// fold (truncate (build pair x, y)) -> (truncate x) or x
3475	if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3476	EVT TruncTy = Op.getValueType();
3477	SDValue Elem0 = Op0.getOperand(i: `0`);
3478	// if we match the low element of the pair, just return it.
3479	if (Elem0.getValueType() == TruncTy)
3480	return Elem0;
3481	// otherwise, if the low part is still too large, apply the truncate.
3482	if (Elem0.getValueType().bitsGT(VT: TruncTy))
3483	return DCI.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: TruncTy, Operand: Elem0);
3484	}
3485	}
3486
3487	if (DCI.isBeforeLegalizeOps())
3488	return SDValue ();
3489
3490	if (Opc == HexagonISD::P2D) {
3491	SDValue P = Op.getOperand(i: `0`);
3492	switch (P.getOpcode()) {
3493	case HexagonISD::PTRUE:
3494	return DCI.DAG.getConstant(Val: -`1`, DL: dl, VT: ty(Op));
3495	case HexagonISD::PFALSE:
3496	return getZero(dl, Ty: ty(Op), DAG&: DCI.DAG);
3497	default:
3498	break;
3499	}
3500	} else if (Opc == ISD::VSELECT) {
3501	// This is pretty much duplicated in HexagonISelLoweringHVX...
3502	//
3503	// (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
3504	SDValue Cond = Op.getOperand(i: `0`);
3505	if (Cond ->getOpcode() == ISD::XOR) {
3506	SDValue C0 = Cond.getOperand(i: `0`), C1 = Cond.getOperand(i: `1`);
3507	if (C1 ->getOpcode() == HexagonISD::PTRUE) {
3508	SDValue VSel = DCI.DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ty(Op), N1: C0,
3509	N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `1`));
3510	return VSel;
3511	}
3512	}
3513	} else if (Opc == ISD::TRUNCATE) {
3514	SDValue Op0 = Op.getOperand(i: `0`);
3515	// fold (truncate (build pair x, y)) -> (truncate x) or x
3516	if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3517	MVT TruncTy = ty(Op);
3518	SDValue Elem0 = Op0.getOperand(i: `0`);
3519	// if we match the low element of the pair, just return it.
3520	if (ty(Op: Elem0) == TruncTy)
3521	return Elem0;
3522	// otherwise, if the low part is still too large, apply the truncate.
3523	if (ty(Op: Elem0).bitsGT(VT: TruncTy))
3524	return DCI.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: TruncTy, Operand: Elem0);
3525	}
3526	} else if (Opc == ISD::OR) {
3527	// fold (or (shl xx, s), (zext y)) -> (COMBINE (shl xx, s-32), y)
3528	// if s >= 32
3529	auto fold0 = [&, this](SDValue Op) {
3530	if (ty(Op) != MVT::i64)
3531	return SDValue ();
3532	SDValue Shl = Op.getOperand(i: `0`);
3533	SDValue Zxt = Op.getOperand(i: `1`);
3534	if (Shl.getOpcode() != ISD::SHL)
3535	std::swap(a&: Shl, b&: Zxt);
3536
3537	if (Shl.getOpcode() != ISD::SHL \|\| Zxt.getOpcode() != ISD::ZERO_EXTEND)
3538	return SDValue ();
3539
3540	SDValue Z = Zxt.getOperand(i: `0`);
3541	auto *Amt = dyn_cast<ConstantSDNode>(Val: Shl.getOperand(i: `1`));
3542	if (Amt && Amt->getZExtValue() >= `32` && ty(Op: Z).getSizeInBits() <= `32`) {
3543	unsigned A = Amt->getZExtValue();
3544	SDValue S = Shl.getOperand(i: `0`);
3545	SDValue T0 = DCI.DAG.getNode(ISD::SHL, dl, ty(S), S,
3546	DCI.DAG.getConstant(A - `32`, dl, MVT::i32));
3547	SDValue T1 = DCI.DAG.getZExtOrTrunc(T0, dl, MVT::i32);
3548	SDValue T2 = DCI.DAG.getZExtOrTrunc(Z, dl, MVT::i32);
3549	return DCI.DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {T1, T2});
3550	}
3551	return SDValue ();
3552	};
3553
3554	if (SDValue R = fold0 (Op))
3555	return R;
3556	}
3557
3558	return SDValue ();
3559	}
3560
3561	/// Returns relocation base for the given PIC jumptable.
3562	SDValue
3563	HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3564	SelectionDAG &DAG) const {
3565	int Idx = cast<JumpTableSDNode>(Val&: Table)->getIndex();
3566	EVT VT = Table.getValueType();
3567	SDValue T = DAG.getTargetJumpTable(JTI: Idx, VT, TargetFlags: HexagonII::MO_PCREL);
3568	return DAG.getNode(Opcode: HexagonISD::AT_PCREL, DL: SDLoc (Table), VT, Operand: T);
3569	}
3570
3571	//===----------------------------------------------------------------------===//
3572	// Inline Assembly Support
3573	//===----------------------------------------------------------------------===//
3574
3575	TargetLowering::ConstraintType
3576	HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
3577	if (Constraint.size() == `1`) {
3578	switch (Constraint [`0`]) {
3579	case `'q'`:
3580	case `'v'`:
3581	if (Subtarget.useHVXOps())
3582	return C_RegisterClass;
3583	break;
3584	case `'a'`:
3585	return C_RegisterClass;
3586	default:
3587	break;
3588	}
3589	}
3590	return TargetLowering::getConstraintType(Constraint);
3591	}
3592
3593	std::pair<unsigned, const TargetRegisterClass*>
3594	HexagonTargetLowering::getRegForInlineAsmConstraint(
3595	const TargetRegisterInfo TRI, StringRef Constraint, MVT VT) const* {
3596
3597	if (Constraint.size() == `1`) {
3598	switch (Constraint [`0`]) {
3599	case `'r'`: // R0-R31
3600	switch (VT.SimpleTy) {
3601	default:
3602	return {`0u`, nullptr};
3603	case MVT::i1:
3604	case MVT::i8:
3605	case MVT::i16:
3606	case MVT::i32:
3607	case MVT::f32:
3608	return {`0u`, &Hexagon::IntRegsRegClass};
3609	case MVT::i64:
3610	case MVT::f64:
3611	return {`0u`, &Hexagon::DoubleRegsRegClass};
3612	}
3613	break;
3614	case `'a'`: // M0-M1
3615	if (VT != MVT::i32)
3616	return {`0u`, nullptr};
3617	return {`0u`, &Hexagon::ModRegsRegClass};
3618	case `'q'`: // q0-q3
3619	switch (VT.getSizeInBits()) {
3620	default:
3621	return {`0u`, nullptr};
3622	case `64`:
3623	case `128`:
3624	return {`0u`, &Hexagon::HvxQRRegClass};
3625	}
3626	break;
3627	case `'v'`: // V0-V31
3628	switch (VT.getSizeInBits()) {
3629	default:
3630	return {`0u`, nullptr};
3631	case `512`:
3632	return {`0u`, &Hexagon::HvxVRRegClass};
3633	case `1024`:
3634	if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
3635	return {`0u`, &Hexagon::HvxVRRegClass};
3636	return {`0u`, &Hexagon::HvxWRRegClass};
3637	case `2048`:
3638	return {`0u`, &Hexagon::HvxWRRegClass};
3639	}
3640	break;
3641	default:
3642	return {`0u`, nullptr};
3643	}
3644	}
3645
3646	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3647	}
3648
3649	/// isFPImmLegal - Returns true if the target can instruction select the
3650	/// specified FP immediate natively. If false, the legalizer will
3651	/// materialize the FP immediate as a load from a constant pool.
3652	bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
3653	bool ForCodeSize) const {
3654	return true;
3655	}
3656
3657	/// isLegalAddressingMode - Return true if the addressing mode represented by
3658	/// AM is legal for this target, for a load/store of the specified type.
3659	bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
3660	const AddrMode &AM, Type *Ty,
3661	unsigned AS, Instruction I) const* {
3662	if (Ty->isSized()) {
3663	// When LSR detects uses of the same base address to access different
3664	// types (e.g. unions), it will assume a conservative type for these
3665	// uses:
3666	// LSR Use: Kind=Address of void in addrspace(4294967295), ...
3667	// The type Ty passed here would then be "void". Skip the alignment
3668	// checks, but do not return false right away, since that confuses
3669	// LSR into crashing.
3670	Align A = DL.getABITypeAlign(Ty);
3671	// The base offset must be a multiple of the alignment.
3672	if (!isAligned(Lhs: A, SizeInBytes: AM.BaseOffs))
3673	return false;
3674	// The shifted offset must fit in 11 bits.
3675	if (!isInt<`11`>(x: AM.BaseOffs >> Log2(A)))
3676	return false;
3677	}
3678
3679	// No global is ever allowed as a base.
3680	if (AM.BaseGV)
3681	return false;
3682
3683	int Scale = AM.Scale;
3684	if (Scale < `0`)
3685	Scale = -Scale;
3686	switch (Scale) {
3687	case `0`: // No scale reg, "r+i", "r", or just "i".
3688	break;
3689	default: // No scaled addressing mode.
3690	return false;
3691	}
3692	return true;
3693	}
3694
3695	/// Return true if folding a constant offset with the given GlobalAddress is
3696	/// legal. It is frequently not legal in PIC relocation models.
3697	bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
3698	const {
3699	return HTM.getRelocationModel() == Reloc::Static;
3700	}
3701
3702	/// isLegalICmpImmediate - Return true if the specified immediate is legal
3703	/// icmp immediate, that is the target has icmp instructions which can compare
3704	/// a register against the immediate without having to materialize the
3705	/// immediate into a register.
3706	bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3707	return Imm >= -`512` && Imm <= `511`;
3708	}
3709
3710	/// IsEligibleForTailCallOptimization - Check whether the call is eligible
3711	/// for tail call optimization. Targets which want to do tail call
3712	/// optimization should implement this function.
3713	bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3714	SDValue Callee,
3715	CallingConv::ID CalleeCC,
3716	bool IsVarArg,
3717	bool IsCalleeStructRet,
3718	bool IsCallerStructRet,
3719	const SmallVectorImpl<ISD::OutputArg> &Outs,
3720	const SmallVectorImpl<SDValue> &OutVals,
3721	const SmallVectorImpl<ISD::InputArg> &Ins,
3722	SelectionDAG& DAG) const {
3723	const Function &CallerF = DAG.getMachineFunction().getFunction();
3724	CallingConv::ID CallerCC = CallerF.getCallingConv();
3725	bool CCMatch = CallerCC == CalleeCC;
3726
3727	// ***************************************************************************
3728	// Look for obvious safe cases to perform tail call optimization that do not
3729	// require ABI changes.
3730	// ***************************************************************************
3731
3732	// If this is a tail call via a function pointer, then don't do it!
3733	if (!isa<GlobalAddressSDNode>(Val: Callee) &&
3734	!isa<ExternalSymbolSDNode>(Val: Callee)) {
3735	return false;
3736	}
3737
3738	// Do not optimize if the calling conventions do not match and the conventions
3739	// used are not C or Fast.
3740	if (!CCMatch) {
3741	bool R = (CallerCC == CallingConv::C \|\| CallerCC == CallingConv::Fast);
3742	bool E = (CalleeCC == CallingConv::C \|\| CalleeCC == CallingConv::Fast);
3743	// If R & E, then ok.
3744	if (!R \|\| !E)
3745	return false;
3746	}
3747
3748	// Do not tail call optimize vararg calls.
3749	if (IsVarArg)
3750	return false;
3751
3752	// Also avoid tail call optimization if either caller or callee uses struct
3753	// return semantics.
3754	if (IsCalleeStructRet \|\| IsCallerStructRet)
3755	return false;
3756
3757	// In addition to the cases above, we also disable Tail Call Optimization if
3758	// the calling convention code that at least one outgoing argument needs to
3759	// go on the stack. We cannot check that here because at this point that
3760	// information is not available.
3761	return true;
3762	}
3763
3764	/// Returns the target specific optimal type for load and store operations as
3765	/// a result of memset, memcpy, and memmove lowering.
3766	///
3767	/// If DstAlign is zero that means it's safe to destination alignment can
3768	/// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3769	/// a need to check it against alignment requirement, probably because the
3770	/// source does not need to be loaded. If 'IsMemset' is true, that means it's
3771	/// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3772	/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3773	/// does not need to be loaded. It returns EVT::Other if the type should be
3774	/// determined using generic target-independent logic.
3775	EVT HexagonTargetLowering::getOptimalMemOpType(
3776	const MemOp &Op, const AttributeList &FuncAttributes) const {
3777	if (Op.size() >= `8` && Op.isAligned(Align(`8`)))
3778	return MVT::i64;
3779	if (Op.size() >= `4` && Op.isAligned(Align(`4`)))
3780	return MVT::i32;
3781	if (Op.size() >= `2` && Op.isAligned(Align(`2`)))
3782	return MVT::i16;
3783	return MVT::Other;
3784	}
3785
3786	bool HexagonTargetLowering::allowsMemoryAccess(
3787	LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
3788	Align Alignment, MachineMemOperand::Flags Flags, unsigned Fast) const* {
3789	MVT SVT = VT.getSimpleVT();
3790	if (Subtarget.isHVXVectorType(VecTy: SVT, IncludeBool: true))
3791	return allowsHvxMemoryAccess(VecTy: SVT, Flags, Fast);
3792	return TargetLoweringBase::allowsMemoryAccess(
3793	Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
3794	}
3795
3796	bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3797	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3798	unsigned Fast) const* {
3799	MVT SVT = VT.getSimpleVT();
3800	if (Subtarget.isHVXVectorType(VecTy: SVT, IncludeBool: true))
3801	return allowsHvxMisalignedMemoryAccesses(VecTy: SVT, Flags, Fast);
3802	if (Fast)
3803	*Fast = `0`;
3804	return false;
3805	}
3806
3807	std::pair<const TargetRegisterClass*, uint8_t>
3808	HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
3809	MVT VT) const {
3810	if (Subtarget.isHVXVectorType(VecTy: VT, IncludeBool: true)) {
3811	unsigned BitWidth = VT.getSizeInBits();
3812	unsigned VecWidth = Subtarget.getVectorLength() * `8`;
3813
3814	if (VT.getVectorElementType() == MVT::i1)
3815	return std::make_pair(&Hexagon::HvxQRRegClass, `1`);
3816	if (BitWidth == VecWidth)
3817	return std::make_pair(&Hexagon::HvxVRRegClass, `1`);
3818	assert(BitWidth == `2` * VecWidth);
3819	return std::make_pair(&Hexagon::HvxWRRegClass, `1`);
3820	}
3821
3822	return TargetLowering::findRepresentativeClass(TRI, VT);
3823	}
3824
3825	bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
3826	ISD::LoadExtType ExtTy, EVT NewVT) const {
3827	// TODO: This may be worth removing. Check regression tests for diffs.
3828	if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
3829	return false;
3830
3831	auto *L = cast<LoadSDNode>(Val: Load);
3832	std::pair<SDValue,int> BO = getBaseAndOffset(Addr: L->getBasePtr());
3833	// Small-data object, do not shrink.
3834	if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
3835	return false;
3836	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: BO.first)) {
3837	auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
3838	const auto GO = dyn_cast_or_null<const* GlobalObject>(Val: GA->getGlobal());
3839	return !GO \|\| !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
3840	}
3841	return true;
3842	}
3843
3844	void HexagonTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
3845	SDNode Node) const* {
3846	AdjustHvxInstrPostInstrSelection(MI, Node);
3847	}
3848
3849	Value *HexagonTargetLowering::emitLoadLinked(IRBuilderBase &Builder,
3850	Type ValueTy, Value Addr,
3851	AtomicOrdering Ord) const {
3852	BasicBlock *BB = Builder.GetInsertBlock();
3853	Module *M = BB->getParent()->getParent();
3854	unsigned SZ = ValueTy->getPrimitiveSizeInBits();
3855	assert((SZ == `32` \|\| SZ == `64`) && "Only 32/64-bit atomic loads supported");
3856	Intrinsic::ID IntID = (SZ == `32`) ? Intrinsic::hexagon_L2_loadw_locked
3857	: Intrinsic::hexagon_L4_loadd_locked;
3858	Function *Fn = Intrinsic::getDeclaration(M, id: IntID);
3859
3860	Value *Call = Builder.CreateCall(Callee: Fn, Args: Addr, Name: "larx");
3861
3862	return Builder.CreateBitCast(V: Call, DestTy: ValueTy);
3863	}
3864
3865	/// Perform a store-conditional operation to Addr. Return the status of the
3866	/// store. This should be 0 if the store succeeded, non-zero otherwise.
3867	Value *HexagonTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
3868	Value Val, Value Addr,
3869	AtomicOrdering Ord) const {
3870	BasicBlock *BB = Builder.GetInsertBlock();
3871	Module *M = BB->getParent()->getParent();
3872	Type *Ty = Val->getType();
3873	unsigned SZ = Ty->getPrimitiveSizeInBits();
3874
3875	Type *CastTy = Builder.getIntNTy(N: SZ);
3876	assert((SZ == `32` \|\| SZ == `64`) && "Only 32/64-bit atomic stores supported");
3877	Intrinsic::ID IntID = (SZ == `32`) ? Intrinsic::hexagon_S2_storew_locked
3878	: Intrinsic::hexagon_S4_stored_locked;
3879	Function *Fn = Intrinsic::getDeclaration(M, id: IntID);
3880
3881	Val = Builder.CreateBitCast(V: Val, DestTy: CastTy);
3882
3883	Value *Call = Builder.CreateCall(Callee: Fn, Args: {Addr, Val}, Name: "stcx");
3884	Value *Cmp = Builder.CreateICmpEQ(LHS: Call, RHS: Builder.getInt32(C: `0`), Name: "");
3885	Value *Ext = Builder.CreateZExt(V: Cmp, DestTy: Type::getInt32Ty(C&: M->getContext()));
3886	return Ext;
3887	}
3888
3889	TargetLowering::AtomicExpansionKind
3890	HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst LI) const* {
3891	// Do not expand loads and stores that don't exceed 64 bits.
3892	return LI->getType()->getPrimitiveSizeInBits() > `64`
3893	? AtomicExpansionKind::LLOnly
3894	: AtomicExpansionKind::None;
3895	}
3896
3897	TargetLowering::AtomicExpansionKind
3898	HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst SI) const* {
3899	// Do not expand loads and stores that don't exceed 64 bits.
3900	return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > `64`
3901	? AtomicExpansionKind::Expand
3902	: AtomicExpansionKind::None;
3903	}
3904
3905	TargetLowering::AtomicExpansionKind
3906	HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3907	AtomicCmpXchgInst AI) const* {
3908	return AtomicExpansionKind::LLSC;
3909	}
3910

source code of llvm/lib/Target/Hexagon/HexagonISelLowering.cpp