HexagonISelLoweringHVX.cpp source code [llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp]

1	//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "HexagonISelLowering.h"
10	#include "HexagonRegisterInfo.h"
11	#include "HexagonSubtarget.h"
12	#include "llvm/ADT/SetVector.h"
13	#include "llvm/ADT/SmallVector.h"
14	#include "llvm/Analysis/MemoryLocation.h"
15	#include "llvm/CodeGen/MachineBasicBlock.h"
16	#include "llvm/CodeGen/MachineFunction.h"
17	#include "llvm/CodeGen/MachineInstr.h"
18	#include "llvm/CodeGen/MachineOperand.h"
19	#include "llvm/CodeGen/MachineRegisterInfo.h"
20	#include "llvm/CodeGen/TargetInstrInfo.h"
21	#include "llvm/IR/IntrinsicsHexagon.h"
22	#include "llvm/Support/CommandLine.h"
23
24	#include <algorithm>
25	#include <string>
26	#include <utility>
27
28	using namespace llvm;
29
30	static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
31	cl::Hidden, cl::init(Val: `16`),
32	cl::desc ("Lower threshold (in bytes) for widening to HVX vectors"));
33
34	static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35	static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36	static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37	static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39	static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40	// For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41	MVT ElemTy = Ty.getScalarType();
42	switch (ElemTy.SimpleTy) {
43	case MVT::f16:
44	return std::make_tuple(args: `5`, args: `15`, args: `10`);
45	case MVT::f32:
46	return std::make_tuple(args: `8`, args: `127`, args: `23`);
47	case MVT::f64:
48	return std::make_tuple(args: `11`, args: `1023`, args: `52`);
49	default:
50	break;
51	}
52	llvm_unreachable(("Unexpected type: " + EVT (ElemTy).getEVTString()).c_str());
53	}
54
55	void
56	HexagonTargetLowering::initializeHVXLowering() {
57	if (Subtarget.useHVX64BOps()) {
58	addRegisterClass(MVT::VT: v64i8, RC: &Hexagon::HvxVRRegClass);
59	addRegisterClass(MVT::VT: v32i16, RC: &Hexagon::HvxVRRegClass);
60	addRegisterClass(MVT::VT: v16i32, RC: &Hexagon::HvxVRRegClass);
61	addRegisterClass(MVT::VT: v128i8, RC: &Hexagon::HvxWRRegClass);
62	addRegisterClass(MVT::VT: v64i16, RC: &Hexagon::HvxWRRegClass);
63	addRegisterClass(MVT::VT: v32i32, RC: &Hexagon::HvxWRRegClass);
64	// These "short" boolean vector types should be legal because
65	// they will appear as results of vector compares. If they were
66	// not legal, type legalization would try to make them legal
67	// and that would require using operations that do not use or
68	// produce such types. That, in turn, would imply using custom
69	// nodes, which would be unoptimizable by the DAG combiner.
70	// The idea is to rely on target-independent operations as much
71	// as possible.
72	addRegisterClass(MVT::VT: v16i1, RC: &Hexagon::HvxQRRegClass);
73	addRegisterClass(MVT::VT: v32i1, RC: &Hexagon::HvxQRRegClass);
74	addRegisterClass(MVT::VT: v64i1, RC: &Hexagon::HvxQRRegClass);
75	} else if (Subtarget.useHVX128BOps()) {
76	addRegisterClass(MVT::VT: v128i8, RC: &Hexagon::HvxVRRegClass);
77	addRegisterClass(MVT::VT: v64i16, RC: &Hexagon::HvxVRRegClass);
78	addRegisterClass(MVT::VT: v32i32, RC: &Hexagon::HvxVRRegClass);
79	addRegisterClass(MVT::VT: v256i8, RC: &Hexagon::HvxWRRegClass);
80	addRegisterClass(MVT::VT: v128i16, RC: &Hexagon::HvxWRRegClass);
81	addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82	addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83	addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84	addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85	if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86	addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87	addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88	addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89	addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90	}
91	}
92
93	// Set up operation actions.
94
95	bool Use64b = Subtarget.useHVX64BOps();
96	ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97	ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98	MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99	MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100	MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102	auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
103	setOperationAction(Op: Opc, VT: FromTy, Action: Promote);
104	AddPromotedToType(Opc, OrigVT: FromTy, DestVT: ToTy);
105	};
106
107	// Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108	// Note: v16i1 -> i16 is handled in type legalization instead of op
109	// legalization.
110	setOperationAction(ISD::BITCAST, MVT::i16, Custom);
111	setOperationAction(ISD::BITCAST, MVT::i32, Custom);
112	setOperationAction(ISD::BITCAST, MVT::i64, Custom);
113	setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
114	setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
115	setOperationAction(ISD::BITCAST, MVT::i128, Custom);
116	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteV, Action: Legal);
117	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteW, Action: Legal);
118	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
119
120	if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
121	Subtarget.useHVXFloatingPoint()) {
122
123	static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
124	static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
125
126	for (MVT T : FloatV) {
127	setOperationAction(ISD::FADD, T, Legal);
128	setOperationAction(ISD::FSUB, T, Legal);
129	setOperationAction(ISD::FMUL, T, Legal);
130	setOperationAction(ISD::FMINNUM, T, Legal);
131	setOperationAction(ISD::FMAXNUM, T, Legal);
132
133	setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
134	setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
135
136	setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
137	setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
138
139	setOperationAction(ISD::MLOAD, T, Custom);
140	setOperationAction(ISD::MSTORE, T, Custom);
141	// Custom-lower BUILD_VECTOR. The standard (target-independent)
142	// handling of it would convert it to a load, which is not always
143	// the optimal choice.
144	setOperationAction(ISD::BUILD_VECTOR, T, Custom);
145	}
146
147
148	// BUILD_VECTOR with f16 operands cannot be promoted without
149	// promoting the result, so lower the node to vsplat or constant pool
150	setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
151	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom);
152	setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom);
153
154	// Vector shuffle is always promoted to ByteV and a bitcast to f16 is
155	// generated.
156	setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
157	setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
158	setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
159	setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
160
161	for (MVT P : FloatW) {
162	setOperationAction(ISD::LOAD, P, Custom);
163	setOperationAction(ISD::STORE, P, Custom);
164	setOperationAction(ISD::FADD, P, Custom);
165	setOperationAction(ISD::FSUB, P, Custom);
166	setOperationAction(ISD::FMUL, P, Custom);
167	setOperationAction(ISD::FMINNUM, P, Custom);
168	setOperationAction(ISD::FMAXNUM, P, Custom);
169	setOperationAction(ISD::SETCC, P, Custom);
170	setOperationAction(ISD::VSELECT, P, Custom);
171
172	// Custom-lower BUILD_VECTOR. The standard (target-independent)
173	// handling of it would convert it to a load, which is not always
174	// the optimal choice.
175	setOperationAction(ISD::BUILD_VECTOR, P, Custom);
176	// Make concat-vectors custom to handle concats of more than 2 vectors.
177	setOperationAction(ISD::CONCAT_VECTORS, P, Custom);
178
179	setOperationAction(ISD::MLOAD, P, Custom);
180	setOperationAction(ISD::MSTORE, P, Custom);
181	}
182
183	if (Subtarget.useHVXQFloatOps()) {
184	setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
185	setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
186	} else if (Subtarget.useHVXIEEEFPOps()) {
187	setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
188	setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal);
189	}
190	}
191
192	for (MVT T : LegalV) {
193	setIndexedLoadAction(ISD::POST_INC, T, Legal);
194	setIndexedStoreAction(ISD::POST_INC, T, Legal);
195
196	setOperationAction(ISD::ABS, T, Legal);
197	setOperationAction(ISD::AND, T, Legal);
198	setOperationAction(ISD::OR, T, Legal);
199	setOperationAction(ISD::XOR, T, Legal);
200	setOperationAction(ISD::ADD, T, Legal);
201	setOperationAction(ISD::SUB, T, Legal);
202	setOperationAction(ISD::MUL, T, Legal);
203	setOperationAction(ISD::CTPOP, T, Legal);
204	setOperationAction(ISD::CTLZ, T, Legal);
205	setOperationAction(ISD::SELECT, T, Legal);
206	setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
207	if (T != ByteV) {
208	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
209	setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
210	setOperationAction(ISD::BSWAP, T, Legal);
211	}
212
213	setOperationAction(ISD::SMIN, T, Legal);
214	setOperationAction(ISD::SMAX, T, Legal);
215	if (T.getScalarType() != MVT::i32) {
216	setOperationAction(ISD::UMIN, T, Legal);
217	setOperationAction(ISD::UMAX, T, Legal);
218	}
219
220	setOperationAction(ISD::CTTZ, T, Custom);
221	setOperationAction(ISD::LOAD, T, Custom);
222	setOperationAction(ISD::MLOAD, T, Custom);
223	setOperationAction(ISD::MSTORE, T, Custom);
224	if (T.getScalarType() != MVT::i32) {
225	setOperationAction(ISD::MULHS, T, Legal);
226	setOperationAction(ISD::MULHU, T, Legal);
227	}
228
229	setOperationAction(ISD::BUILD_VECTOR, T, Custom);
230	// Make concat-vectors custom to handle concats of more than 2 vectors.
231	setOperationAction(ISD::CONCAT_VECTORS, T, Custom);
232	setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
233	setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
234	setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
235	setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
236	setOperationAction(ISD::ANY_EXTEND, T, Custom);
237	setOperationAction(ISD::SIGN_EXTEND, T, Custom);
238	setOperationAction(ISD::ZERO_EXTEND, T, Custom);
239	setOperationAction(ISD::FSHL, T, Custom);
240	setOperationAction(ISD::FSHR, T, Custom);
241	if (T != ByteV) {
242	setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
243	// HVX only has shifts of words and halfwords.
244	setOperationAction(ISD::SRA, T, Custom);
245	setOperationAction(ISD::SHL, T, Custom);
246	setOperationAction(ISD::SRL, T, Custom);
247
248	// Promote all shuffles to operate on vectors of bytes.
249	setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
250	}
251
252	if (Subtarget.useHVXFloatingPoint()) {
253	// Same action for both QFloat and IEEE.
254	setOperationAction(ISD::SINT_TO_FP, T, Custom);
255	setOperationAction(ISD::UINT_TO_FP, T, Custom);
256	setOperationAction(ISD::FP_TO_SINT, T, Custom);
257	setOperationAction(ISD::FP_TO_UINT, T, Custom);
258	}
259
260	setCondCodeAction(ISD::SETNE, T, Expand);
261	setCondCodeAction(ISD::SETLE, T, Expand);
262	setCondCodeAction(ISD::SETGE, T, Expand);
263	setCondCodeAction(ISD::SETLT, T, Expand);
264	setCondCodeAction(ISD::SETULE, T, Expand);
265	setCondCodeAction(ISD::SETUGE, T, Expand);
266	setCondCodeAction(ISD::SETULT, T, Expand);
267	}
268
269	for (MVT T : LegalW) {
270	// Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
271	// independent) handling of it would convert it to a load, which is
272	// not always the optimal choice.
273	setOperationAction(ISD::BUILD_VECTOR, T, Custom);
274	// Make concat-vectors custom to handle concats of more than 2 vectors.
275	setOperationAction(ISD::CONCAT_VECTORS, T, Custom);
276
277	// Custom-lower these operations for pairs. Expand them into a concat
278	// of the corresponding operations on individual vectors.
279	setOperationAction(ISD::ANY_EXTEND, T, Custom);
280	setOperationAction(ISD::SIGN_EXTEND, T, Custom);
281	setOperationAction(ISD::ZERO_EXTEND, T, Custom);
282	setOperationAction(ISD::SIGN_EXTEND_INREG, T, Custom);
283	setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
284	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
285	setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
286	setOperationAction(ISD::SPLAT_VECTOR, T, Custom);
287
288	setOperationAction(ISD::LOAD, T, Custom);
289	setOperationAction(ISD::STORE, T, Custom);
290	setOperationAction(ISD::MLOAD, T, Custom);
291	setOperationAction(ISD::MSTORE, T, Custom);
292	setOperationAction(ISD::ABS, T, Custom);
293	setOperationAction(ISD::CTLZ, T, Custom);
294	setOperationAction(ISD::CTTZ, T, Custom);
295	setOperationAction(ISD::CTPOP, T, Custom);
296
297	setOperationAction(ISD::ADD, T, Legal);
298	setOperationAction(ISD::SUB, T, Legal);
299	setOperationAction(ISD::MUL, T, Custom);
300	setOperationAction(ISD::MULHS, T, Custom);
301	setOperationAction(ISD::MULHU, T, Custom);
302	setOperationAction(ISD::AND, T, Custom);
303	setOperationAction(ISD::OR, T, Custom);
304	setOperationAction(ISD::XOR, T, Custom);
305	setOperationAction(ISD::SETCC, T, Custom);
306	setOperationAction(ISD::VSELECT, T, Custom);
307	if (T != ByteW) {
308	setOperationAction(ISD::SRA, T, Custom);
309	setOperationAction(ISD::SHL, T, Custom);
310	setOperationAction(ISD::SRL, T, Custom);
311
312	// Promote all shuffles to operate on vectors of bytes.
313	setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
314	}
315	setOperationAction(ISD::FSHL, T, Custom);
316	setOperationAction(ISD::FSHR, T, Custom);
317
318	setOperationAction(ISD::SMIN, T, Custom);
319	setOperationAction(ISD::SMAX, T, Custom);
320	if (T.getScalarType() != MVT::i32) {
321	setOperationAction(ISD::UMIN, T, Custom);
322	setOperationAction(ISD::UMAX, T, Custom);
323	}
324
325	if (Subtarget.useHVXFloatingPoint()) {
326	// Same action for both QFloat and IEEE.
327	setOperationAction(ISD::SINT_TO_FP, T, Custom);
328	setOperationAction(ISD::UINT_TO_FP, T, Custom);
329	setOperationAction(ISD::FP_TO_SINT, T, Custom);
330	setOperationAction(ISD::FP_TO_UINT, T, Custom);
331	}
332	}
333
334	// Legalize all of these to HexagonISD::[SU]MUL_LOHI.
335	setOperationAction(Op: ISD::MULHS, VT: WordV, Action: Custom); // -> _LOHI
336	setOperationAction(Op: ISD::MULHU, VT: WordV, Action: Custom); // -> _LOHI
337	setOperationAction(Op: ISD::SMUL_LOHI, VT: WordV, Action: Custom);
338	setOperationAction(Op: ISD::UMUL_LOHI, VT: WordV, Action: Custom);
339
340	setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
341	setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
342	setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
343	setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
344	setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
345	setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
346	setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
347	setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
348	setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
349	setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
350	setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
351	setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
352
353	setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
354	setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
355	setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
356	setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
357	setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
358	setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
359	setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
360	setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
361	setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
362	setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
363	setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
364	setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
365
366	// Boolean vectors.
367
368	for (MVT T : LegalW) {
369	// Boolean types for vector pairs will overlap with the boolean
370	// types for single vectors, e.g.
371	// v64i8 -> v64i1 (single)
372	// v64i16 -> v64i1 (pair)
373	// Set these actions first, and allow the single actions to overwrite
374	// any duplicates.
375	MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
376	setOperationAction(ISD::SETCC, BoolW, Custom);
377	setOperationAction(ISD::AND, BoolW, Custom);
378	setOperationAction(ISD::OR, BoolW, Custom);
379	setOperationAction(ISD::XOR, BoolW, Custom);
380	// Masked load/store takes a mask that may need splitting.
381	setOperationAction(ISD::MLOAD, BoolW, Custom);
382	setOperationAction(ISD::MSTORE, BoolW, Custom);
383	}
384
385	for (MVT T : LegalV) {
386	MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
387	setOperationAction(ISD::BUILD_VECTOR, BoolV, Custom);
388	setOperationAction(ISD::CONCAT_VECTORS, BoolV, Custom);
389	setOperationAction(ISD::INSERT_SUBVECTOR, BoolV, Custom);
390	setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom);
391	setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom);
392	setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom);
393	setOperationAction(ISD::SELECT, BoolV, Custom);
394	setOperationAction(ISD::AND, BoolV, Legal);
395	setOperationAction(ISD::OR, BoolV, Legal);
396	setOperationAction(ISD::XOR, BoolV, Legal);
397	}
398
399	if (Use64b) {
400	for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
401	setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
402	} else {
403	for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
404	setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
405	}
406
407	// Handle store widening for short vectors.
408	unsigned HwLen = Subtarget.getVectorLength();
409	for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
410	if (ElemTy == MVT::i1)
411	continue;
412	int ElemWidth = ElemTy.getFixedSizeInBits();
413	int MaxElems = (`8`*HwLen) / ElemWidth;
414	for (int N = `2`; N < MaxElems; N *= `2`) {
415	MVT VecTy = MVT::getVectorVT(VT: ElemTy, NumElements: N);
416	auto Action = getPreferredVectorAction(VT: VecTy);
417	if (Action == TargetLoweringBase::TypeWidenVector) {
418	setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Custom);
419	setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Custom);
420	setOperationAction(Op: ISD::SETCC, VT: VecTy, Action: Custom);
421	setOperationAction(Op: ISD::TRUNCATE, VT: VecTy, Action: Custom);
422	setOperationAction(Op: ISD::ANY_EXTEND, VT: VecTy, Action: Custom);
423	setOperationAction(Op: ISD::SIGN_EXTEND, VT: VecTy, Action: Custom);
424	setOperationAction(Op: ISD::ZERO_EXTEND, VT: VecTy, Action: Custom);
425	if (Subtarget.useHVXFloatingPoint()) {
426	setOperationAction(Op: ISD::FP_TO_SINT, VT: VecTy, Action: Custom);
427	setOperationAction(Op: ISD::FP_TO_UINT, VT: VecTy, Action: Custom);
428	setOperationAction(Op: ISD::SINT_TO_FP, VT: VecTy, Action: Custom);
429	setOperationAction(Op: ISD::UINT_TO_FP, VT: VecTy, Action: Custom);
430	}
431
432	MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
433	if (!isTypeLegal(VT: BoolTy))
434	setOperationAction(Op: ISD::SETCC, VT: BoolTy, Action: Custom);
435	}
436	}
437	}
438
439	setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
440	}
441
442	unsigned
443	HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
444	MVT ElemTy = VecTy.getVectorElementType();
445	unsigned VecLen = VecTy.getVectorNumElements();
446	unsigned HwLen = Subtarget.getVectorLength();
447
448	// Split vectors of i1 that exceed byte vector length.
449	if (ElemTy == MVT::i1 && VecLen > HwLen)
450	return TargetLoweringBase::TypeSplitVector;
451
452	ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
453	// For shorter vectors of i1, widen them if any of the corresponding
454	// vectors of integers needs to be widened.
455	if (ElemTy == MVT::i1) {
456	for (MVT T : Tys) {
457	assert(T != MVT::i1);
458	auto A = getPreferredHvxVectorAction(VecTy: MVT::getVectorVT(VT: T, NumElements: VecLen));
459	if (A != ~`0u`)
460	return A;
461	}
462	return ~`0u`;
463	}
464
465	// If the size of VecTy is at least half of the vector length,
466	// widen the vector. Note: the threshold was not selected in
467	// any scientific way.
468	if (llvm::is_contained(Range&: Tys, Element: ElemTy)) {
469	unsigned VecWidth = VecTy.getSizeInBits();
470	unsigned HwWidth = `8`*HwLen;
471	if (VecWidth > `2`*HwWidth)
472	return TargetLoweringBase::TypeSplitVector;
473
474	bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > `0`;
475	if (HaveThreshold && `8`*HvxWidenThreshold <= VecWidth)
476	return TargetLoweringBase::TypeWidenVector;
477	if (VecWidth >= HwWidth/`2` && VecWidth < HwWidth)
478	return TargetLoweringBase::TypeWidenVector;
479	}
480
481	// Defer to default.
482	return ~`0u`;
483	}
484
485	unsigned
486	HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
487	unsigned Opc = Op.getOpcode();
488	switch (Opc) {
489	case HexagonISD::SMUL_LOHI:
490	case HexagonISD::UMUL_LOHI:
491	case HexagonISD::USMUL_LOHI:
492	return TargetLoweringBase::Custom;
493	}
494	return TargetLoweringBase::Legal;
495	}
496
497	SDValue
498	HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
499	const SDLoc &dl, SelectionDAG &DAG) const {
500	SmallVector<SDValue,`4`> IntOps;
501	IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
502	append_range(C&: IntOps, R&: Ops);
503	return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: ResTy, Ops: IntOps);
504	}
505
506	MVT
507	HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
508	assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
509
510	MVT ElemTy = Tys.first.getVectorElementType();
511	return MVT::getVectorVT(VT: ElemTy, NumElements: Tys.first.getVectorNumElements() +
512	Tys.second.getVectorNumElements());
513	}
514
515	HexagonTargetLowering::TypePair
516	HexagonTargetLowering::typeSplit(MVT VecTy) const {
517	assert(VecTy.isVector());
518	unsigned NumElem = VecTy.getVectorNumElements();
519	assert((NumElem % `2`) == `0` && "Expecting even-sized vector type");
520	MVT HalfTy = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: NumElem/`2`);
521	return { HalfTy, HalfTy };
522	}
523
524	MVT
525	HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
526	MVT ElemTy = VecTy.getVectorElementType();
527	MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() * Factor);
528	return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
529	}
530
531	MVT
532	HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
533	MVT ElemTy = VecTy.getVectorElementType();
534	MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() / Factor);
535	return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
536	}
537
538	SDValue
539	HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
540	SelectionDAG &DAG) const {
541	if (ty(Op: Vec).getVectorElementType() == ElemTy)
542	return Vec;
543	MVT CastTy = tyVector(Ty: Vec.getValueType().getSimpleVT(), ElemTy);
544	return DAG.getBitcast(VT: CastTy, V: Vec);
545	}
546
547	SDValue
548	HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
549	SelectionDAG &DAG) const {
550	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: typeJoin(Tys: ty(Ops)),
551	N1: Ops.first, N2: Ops.second);
552	}
553
554	HexagonTargetLowering::VectorPair
555	HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
556	SelectionDAG &DAG) const {
557	TypePair Tys = typeSplit(VecTy: ty(Op: Vec));
558	if (Vec.getOpcode() == HexagonISD::QCAT)
559	return VectorPair (Vec.getOperand(i: `0`), Vec.getOperand(i: `1`));
560	return DAG.SplitVector(N: Vec, DL: dl, LoVT: Tys.first, HiVT: Tys.second);
561	}
562
563	bool
564	HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
565	return Subtarget.isHVXVectorType(VecTy: Ty) &&
566	Ty.getSizeInBits() == `8` * Subtarget.getVectorLength();
567	}
568
569	bool
570	HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
571	return Subtarget.isHVXVectorType(VecTy: Ty) &&
572	Ty.getSizeInBits() == `16` * Subtarget.getVectorLength();
573	}
574
575	bool
576	HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
577	return Subtarget.isHVXVectorType(Ty, true) &&
578	Ty.getVectorElementType() == MVT::i1;
579	}
580
581	bool HexagonTargetLowering::allowsHvxMemoryAccess(
582	MVT VecTy, MachineMemOperand::Flags Flags, unsigned Fast) const* {
583	// Bool vectors are excluded by default, but make it explicit to
584	// emphasize that bool vectors cannot be loaded or stored.
585	// Also, disallow double vector stores (to prevent unnecessary
586	// store widening in DAG combiner).
587	if (VecTy.getSizeInBits() > `8`*Subtarget.getVectorLength())
588	return false;
589	if (!Subtarget.isHVXVectorType(VecTy, /IncludeBool=/false))
590	return false;
591	if (Fast)
592	*Fast = `1`;
593	return true;
594	}
595
596	bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
597	MVT VecTy, MachineMemOperand::Flags Flags, unsigned Fast) const* {
598	if (!Subtarget.isHVXVectorType(VecTy))
599	return false;
600	// XXX Should this be false? vmemu are a bit slower than vmem.
601	if (Fast)
602	*Fast = `1`;
603	return true;
604	}
605
606	void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
607	MachineInstr &MI, SDNode Node) const* {
608	unsigned Opc = MI.getOpcode();
609	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
610	MachineBasicBlock &MB = *MI.getParent();
611	MachineFunction &MF = *MB.getParent();
612	MachineRegisterInfo &MRI = MF.getRegInfo();
613	DebugLoc DL = MI.getDebugLoc();
614	auto At = MI.getIterator();
615
616	switch (Opc) {
617	case Hexagon::PS_vsplatib:
618	if (Subtarget.useHVXV62Ops()) {
619	// SplatV = A2_tfrsi #imm
620	// OutV = V6_lvsplatb SplatV
621	Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
622	BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
623	.add(MI.getOperand(`1`));
624	Register OutV = MI.getOperand(i: `0`).getReg();
625	BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
626	.addReg(SplatV);
627	} else {
628	// SplatV = A2_tfrsi #imm:#imm:#imm:#imm
629	// OutV = V6_lvsplatw SplatV
630	Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
631	const MachineOperand &InpOp = MI.getOperand(i: `1`);
632	assert(InpOp.isImm());
633	uint32_t V = InpOp.getImm() & `0xFF`;
634	BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
635	.addImm(V << `24` \| V << `16` \| V << `8` \| V);
636	Register OutV = MI.getOperand(i: `0`).getReg();
637	BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
638	}
639	MB.erase(I: At);
640	break;
641	case Hexagon::PS_vsplatrb:
642	if (Subtarget.useHVXV62Ops()) {
643	// OutV = V6_lvsplatb Inp
644	Register OutV = MI.getOperand(i: `0`).getReg();
645	BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
646	.add(MI.getOperand(`1`));
647	} else {
648	Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
649	const MachineOperand &InpOp = MI.getOperand(i: `1`);
650	BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
651	.addReg(InpOp.getReg(), `0`, InpOp.getSubReg());
652	Register OutV = MI.getOperand(i: `0`).getReg();
653	BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
654	.addReg(SplatV);
655	}
656	MB.erase(I: At);
657	break;
658	case Hexagon::PS_vsplatih:
659	if (Subtarget.useHVXV62Ops()) {
660	// SplatV = A2_tfrsi #imm
661	// OutV = V6_lvsplath SplatV
662	Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
663	BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
664	.add(MI.getOperand(`1`));
665	Register OutV = MI.getOperand(i: `0`).getReg();
666	BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
667	.addReg(SplatV);
668	} else {
669	// SplatV = A2_tfrsi #imm:#imm
670	// OutV = V6_lvsplatw SplatV
671	Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
672	const MachineOperand &InpOp = MI.getOperand(i: `1`);
673	assert(InpOp.isImm());
674	uint32_t V = InpOp.getImm() & `0xFFFF`;
675	BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
676	.addImm(V << `16` \| V);
677	Register OutV = MI.getOperand(i: `0`).getReg();
678	BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
679	}
680	MB.erase(I: At);
681	break;
682	case Hexagon::PS_vsplatrh:
683	if (Subtarget.useHVXV62Ops()) {
684	// OutV = V6_lvsplath Inp
685	Register OutV = MI.getOperand(i: `0`).getReg();
686	BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
687	.add(MI.getOperand(`1`));
688	} else {
689	// SplatV = A2_combine_ll Inp, Inp
690	// OutV = V6_lvsplatw SplatV
691	Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
692	const MachineOperand &InpOp = MI.getOperand(i: `1`);
693	BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
694	.addReg(InpOp.getReg(), `0`, InpOp.getSubReg())
695	.addReg(InpOp.getReg(), `0`, InpOp.getSubReg());
696	Register OutV = MI.getOperand(i: `0`).getReg();
697	BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
698	}
699	MB.erase(I: At);
700	break;
701	case Hexagon::PS_vsplatiw:
702	case Hexagon::PS_vsplatrw:
703	if (Opc == Hexagon::PS_vsplatiw) {
704	// SplatV = A2_tfrsi #imm
705	Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
706	BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
707	.add(MI.getOperand(`1`));
708	MI.getOperand(i: `1`).ChangeToRegister(Reg: SplatV, isDef: false);
709	}
710	// OutV = V6_lvsplatw SplatV/Inp
711	MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
712	break;
713	}
714	}
715
716	SDValue
717	HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
718	SelectionDAG &DAG) const {
719	if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
720	ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
721
722	unsigned ElemWidth = ElemTy.getSizeInBits();
723	if (ElemWidth == `8`)
724	return ElemIdx;
725
726	unsigned L = Log2_32(Value: ElemWidth/`8`);
727	const SDLoc &dl(ElemIdx);
728	return DAG.getNode(ISD::SHL, dl, MVT::i32,
729	{ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
730	}
731
732	SDValue
733	HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
734	SelectionDAG &DAG) const {
735	unsigned ElemWidth = ElemTy.getSizeInBits();
736	assert(ElemWidth >= `8` && ElemWidth <= `32`);
737	if (ElemWidth == `32`)
738	return Idx;
739
740	if (ty(Idx) != MVT::i32)
741	Idx = DAG.getBitcast(MVT::i32, Idx);
742	const SDLoc &dl(Idx);
743	SDValue Mask = DAG.getConstant(`32`/ElemWidth - `1`, dl, MVT::i32);
744	SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
745	return SubIdx;
746	}
747
748	SDValue
749	HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
750	SDValue Op1, ArrayRef<int> Mask,
751	SelectionDAG &DAG) const {
752	MVT OpTy = ty(Op: Op0);
753	assert(OpTy == ty(Op1));
754
755	MVT ElemTy = OpTy.getVectorElementType();
756	if (ElemTy == MVT::i8)
757	return DAG.getVectorShuffle(VT: OpTy, dl, N1: Op0, N2: Op1, Mask);
758	assert(ElemTy.getSizeInBits() >= `8`);
759
760	MVT ResTy = tyVector(OpTy, MVT::i8);
761	unsigned ElemSize = ElemTy.getSizeInBits() / `8`;
762
763	SmallVector<int,`128`> ByteMask;
764	for (int M : Mask) {
765	if (M < `0`) {
766	for (unsigned I = `0`; I != ElemSize; ++I)
767	ByteMask.push_back(Elt: -`1`);
768	} else {
769	int NewM = M*ElemSize;
770	for (unsigned I = `0`; I != ElemSize; ++I)
771	ByteMask.push_back(Elt: NewM+I);
772	}
773	}
774	assert(ResTy.getVectorNumElements() == ByteMask.size());
775	return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
776	opCastElem(Op1, MVT::i8, DAG), ByteMask);
777	}
778
779	SDValue
780	HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
781	const SDLoc &dl, MVT VecTy,
782	SelectionDAG &DAG) const {
783	unsigned VecLen = Values.size();
784	MachineFunction &MF = DAG.getMachineFunction();
785	MVT ElemTy = VecTy.getVectorElementType();
786	unsigned ElemWidth = ElemTy.getSizeInBits();
787	unsigned HwLen = Subtarget.getVectorLength();
788
789	unsigned ElemSize = ElemWidth / `8`;
790	assert(ElemSize*VecLen == HwLen);
791	SmallVector<SDValue,`32`> Words;
792
793	if (VecTy.getVectorElementType() != MVT::i32 &&
794	!(Subtarget.useHVXFloatingPoint() &&
795	VecTy.getVectorElementType() == MVT::f32)) {
796	assert((ElemSize == `1` \|\| ElemSize == `2`) && "Invalid element size");
797	unsigned OpsPerWord = (ElemSize == `1`) ? `4` : `2`;
798	MVT PartVT = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: OpsPerWord);
799	for (unsigned i = `0`; i != VecLen; i += OpsPerWord) {
800	SDValue W = buildVector32(Elem: Values.slice(N: i, M: OpsPerWord), dl, VecTy: PartVT, DAG);
801	Words.push_back(DAG.getBitcast(MVT::i32, W));
802	}
803	} else {
804	for (SDValue V : Values)
805	Words.push_back(DAG.getBitcast(MVT::i32, V));
806	}
807	auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
808	unsigned NumValues = Values.size();
809	assert(NumValues > `0`);
810	bool IsUndef = true;
811	for (unsigned i = `0`; i != NumValues; ++i) {
812	if (Values [i].isUndef())
813	continue;
814	IsUndef = false;
815	if (!SplatV.getNode())
816	SplatV = Values [i];
817	else if (SplatV != Values [i])
818	return false;
819	}
820	if (IsUndef)
821	SplatV = Values [`0`];
822	return true;
823	};
824
825	unsigned NumWords = Words.size();
826	SDValue SplatV;
827	bool IsSplat = isSplat (Words, SplatV);
828	if (IsSplat && isUndef(Op: SplatV))
829	return DAG.getUNDEF(VT: VecTy);
830	if (IsSplat) {
831	assert(SplatV.getNode());
832	if (isNullConstant(V: SplatV))
833	return getZero(dl, Ty: VecTy, DAG);
834	MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/`4`);
835	SDValue S = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: WordTy, Operand: SplatV);
836	return DAG.getBitcast(VT: VecTy, V: S);
837	}
838
839	// Delay recognizing constant vectors until here, so that we can generate
840	// a vsplat.
841	SmallVector<ConstantInt*, `128`> Consts(VecLen);
842	bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
843	if (AllConst) {
844	ArrayRef<Constant> Tmp((Constant*)Consts.begin(),
845	(Constant**)Consts.end());
846	Constant *CV = ConstantVector::get(V: Tmp);
847	Align Alignment(HwLen);
848	SDValue CP =
849	LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: VecTy, Align: Alignment), DAG);
850	return DAG.getLoad(VT: VecTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
851	PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
852	}
853
854	// A special case is a situation where the vector is built entirely from
855	// elements extracted from another vector. This could be done via a shuffle
856	// more efficiently, but typically, the size of the source vector will not
857	// match the size of the vector being built (which precludes the use of a
858	// shuffle directly).
859	// This only handles a single source vector, and the vector being built
860	// should be of a sub-vector type of the source vector type.
861	auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
862	SmallVectorImpl<int> &SrcIdx) {
863	SDValue Vec;
864	for (SDValue V : Values) {
865	if (isUndef(Op: V)) {
866	SrcIdx.push_back(Elt: -`1`);
867	continue;
868	}
869	if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
870	return false;
871	// All extracts should come from the same vector.
872	SDValue T = V.getOperand(i: `0`);
873	if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
874	return false;
875	Vec = T;
876	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: `1`));
877	if (C == nullptr)
878	return false;
879	int I = C->getSExtValue();
880	assert(I >= `0` && "Negative element index");
881	SrcIdx.push_back(Elt: I);
882	}
883	SrcVec = Vec;
884	return true;
885	};
886
887	SmallVector<int,`128`> ExtIdx;
888	SDValue ExtVec;
889	if (IsBuildFromExtracts (ExtVec, ExtIdx)) {
890	MVT ExtTy = ty(Op: ExtVec);
891	unsigned ExtLen = ExtTy.getVectorNumElements();
892	if (ExtLen == VecLen \|\| ExtLen == `2`*VecLen) {
893	// Construct a new shuffle mask that will produce a vector with the same
894	// number of elements as the input vector, and such that the vector we
895	// want will be the initial subvector of it.
896	SmallVector<int,`128`> Mask;
897	BitVector Used(ExtLen);
898
899	for (int M : ExtIdx) {
900	Mask.push_back(Elt: M);
901	if (M >= `0`)
902	Used.set(M);
903	}
904	// Fill the rest of the mask with the unused elements of ExtVec in hopes
905	// that it will result in a permutation of ExtVec's elements. It's still
906	// fine if it doesn't (e.g. if undefs are present, or elements are
907	// repeated), but permutations can always be done efficiently via vdelta
908	// and vrdelta.
909	for (unsigned I = `0`; I != ExtLen; ++I) {
910	if (Mask.size() == ExtLen)
911	break;
912	if (!Used.test(Idx: I))
913	Mask.push_back(Elt: I);
914	}
915
916	SDValue S = DAG.getVectorShuffle(VT: ExtTy, dl, N1: ExtVec,
917	N2: DAG.getUNDEF(VT: ExtTy), Mask);
918	return ExtLen == VecLen ? S : LoHalf(V: S, DAG);
919	}
920	}
921
922	// Find most common element to initialize vector with. This is to avoid
923	// unnecessary vinsert/valign for cases where the same value is present
924	// many times. Creates a histogram of the vector's elements to find the
925	// most common element n.
926	assert(`4`*Words.size() == Subtarget.getVectorLength());
927	int VecHist[`32`];
928	int n = `0`;
929	for (unsigned i = `0`; i != NumWords; ++i) {
930	VecHist[i] = `0`;
931	if (Words [i].isUndef())
932	continue;
933	for (unsigned j = i; j != NumWords; ++j)
934	if (Words [i] == Words [j])
935	VecHist[i]++;
936
937	if (VecHist[i] > VecHist[n])
938	n = i;
939	}
940
941	SDValue HalfV = getZero(dl, Ty: VecTy, DAG);
942	if (VecHist[n] > `1`) {
943	SDValue SplatV = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Words [n]);
944	HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
945	{HalfV, SplatV, DAG.getConstant(HwLen/`2`, dl, MVT::i32)});
946	}
947	SDValue HalfV0 = HalfV;
948	SDValue HalfV1 = HalfV;
949
950	// Construct two halves in parallel, then or them together. Rn and Rm count
951	// number of rotations needed before the next element. One last rotation is
952	// performed post-loop to position the last element.
953	int Rn = `0`, Rm = `0`;
954	SDValue Sn, Sm;
955	SDValue N = HalfV0;
956	SDValue M = HalfV1;
957	for (unsigned i = `0`; i != NumWords/`2`; ++i) {
958	// Rotate by element count since last insertion.
959	if (Words [i] != Words [n] \|\| VecHist[n] <= `1`) {
960	Sn = DAG.getConstant(Rn, dl, MVT::i32);
961	HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
962	N = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
963	Ops: {HalfV0, Words [i]});
964	Rn = `0`;
965	}
966	if (Words [i+NumWords/`2`] != Words [n] \|\| VecHist[n] <= `1`) {
967	Sm = DAG.getConstant(Rm, dl, MVT::i32);
968	HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
969	M = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
970	Ops: {HalfV1, Words [i+NumWords/`2`]});
971	Rm = `0`;
972	}
973	Rn += `4`;
974	Rm += `4`;
975	}
976	// Perform last rotation.
977	Sn = DAG.getConstant(Rn+HwLen/`2`, dl, MVT::i32);
978	Sm = DAG.getConstant(Rm, dl, MVT::i32);
979	HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
980	HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
981
982	SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
983	SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
984
985	SDValue DstV = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ty(Op: T0), Ops: {T0, T1});
986
987	SDValue OutV =
988	DAG.getBitcast(VT: tyVector(Ty: ty(Op: DstV), ElemTy: VecTy.getVectorElementType()), V: DstV);
989	return OutV;
990	}
991
992	SDValue
993	HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
994	unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
995	MVT PredTy = ty(Op: PredV);
996	unsigned HwLen = Subtarget.getVectorLength();
997	MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
998
999	if (Subtarget.isHVXVectorType(VecTy: PredTy, IncludeBool: true)) {
1000	// Move the vector predicate SubV to a vector register, and scale it
1001	// down to match the representation (bytes per type element) that VecV
1002	// uses. The scaling down will pick every 2nd or 4th (every Scale-th
1003	// in general) element and put them at the front of the resulting
1004	// vector. This subvector will then be inserted into the Q2V of VecV.
1005	// To avoid having an operation that generates an illegal type (short
1006	// vector), generate a full size vector.
1007	//
1008	SDValue T = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: PredV);
1009	SmallVector<int,`128`> Mask(HwLen);
1010	// Scale = BitBytes(PredV) / Given BitBytes.
1011	unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1012	unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1013
1014	for (unsigned i = `0`; i != HwLen; ++i) {
1015	unsigned Num = i % Scale;
1016	unsigned Off = i / Scale;
1017	Mask [BlockLen*Num + Off] = i;
1018	}
1019	SDValue S = DAG.getVectorShuffle(VT: ByteTy, dl, N1: T, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1020	if (!ZeroFill)
1021	return S;
1022	// Fill the bytes beyond BlockLen with 0s.
1023	// V6_pred_scalar2 cannot fill the entire predicate, so it only works
1024	// when BlockLen < HwLen.
1025	assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1026	MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1027	SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1028	{DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1029	SDValue M = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Q);
1030	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ByteTy, N1: S, N2: M);
1031	}
1032
1033	// Make sure that this is a valid scalar predicate.
1034	assert(PredTy == MVT::v2i1 \|\| PredTy == MVT::v4i1 \|\| PredTy == MVT::v8i1);
1035
1036	unsigned Bytes = `8` / PredTy.getVectorNumElements();
1037	SmallVector<SDValue,`4`> Words[`2`];
1038	unsigned IdxW = `0`;
1039
1040	SDValue W0 = isUndef(PredV)
1041	? DAG.getUNDEF(MVT::i64)
1042	: DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1043	Words[IdxW].push_back(Elt: HiHalf(V: W0, DAG));
1044	Words[IdxW].push_back(Elt: LoHalf(V: W0, DAG));
1045
1046	while (Bytes < BitBytes) {
1047	IdxW ^= `1`;
1048	Words[IdxW].clear();
1049
1050	if (Bytes < `4`) {
1051	for (const SDValue &W : Words[IdxW ^ `1`]) {
1052	SDValue T = expandPredicate(Vec32: W, dl, DAG);
1053	Words[IdxW].push_back(Elt: HiHalf(V: T, DAG));
1054	Words[IdxW].push_back(Elt: LoHalf(V: T, DAG));
1055	}
1056	} else {
1057	for (const SDValue &W : Words[IdxW ^ `1`]) {
1058	Words[IdxW].push_back(Elt: W);
1059	Words[IdxW].push_back(Elt: W);
1060	}
1061	}
1062	Bytes *= `2`;
1063	}
1064
1065	assert(Bytes == BitBytes);
1066
1067	SDValue Vec = ZeroFill ? getZero(dl, Ty: ByteTy, DAG) : DAG.getUNDEF(VT: ByteTy);
1068	SDValue S4 = DAG.getConstant(HwLen-`4`, dl, MVT::i32);
1069	for (const SDValue &W : Words[IdxW]) {
1070	Vec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Vec, N2: S4);
1071	Vec = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: ByteTy, N1: Vec, N2: W);
1072	}
1073
1074	return Vec;
1075	}
1076
1077	SDValue
1078	HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1079	const SDLoc &dl, MVT VecTy,
1080	SelectionDAG &DAG) const {
1081	// Construct a vector V of bytes, such that a comparison V >u 0 would
1082	// produce the required vector predicate.
1083	unsigned VecLen = Values.size();
1084	unsigned HwLen = Subtarget.getVectorLength();
1085	assert(VecLen <= HwLen \|\| VecLen == `8`*HwLen);
1086	SmallVector<SDValue,`128`> Bytes;
1087	bool AllT = true, AllF = true;
1088
1089	auto IsTrue = [] (SDValue V) {
1090	if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1091	return !N->isZero();
1092	return false;
1093	};
1094	auto IsFalse = [] (SDValue V) {
1095	if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1096	return N->isZero();
1097	return false;
1098	};
1099
1100	if (VecLen <= HwLen) {
1101	// In the hardware, each bit of a vector predicate corresponds to a byte
1102	// of a vector register. Calculate how many bytes does a bit of VecTy
1103	// correspond to.
1104	assert(HwLen % VecLen == `0`);
1105	unsigned BitBytes = HwLen / VecLen;
1106	for (SDValue V : Values) {
1107	AllT &= IsTrue (V);
1108	AllF &= IsFalse (V);
1109
1110	SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1111	: DAG.getUNDEF(MVT::i8);
1112	for (unsigned B = `0`; B != BitBytes; ++B)
1113	Bytes.push_back(Elt: Ext);
1114	}
1115	} else {
1116	// There are as many i1 values, as there are bits in a vector register.
1117	// Divide the values into groups of 8 and check that each group consists
1118	// of the same value (ignoring undefs).
1119	for (unsigned I = `0`; I != VecLen; I += `8`) {
1120	unsigned B = `0`;
1121	// Find the first non-undef value in this group.
1122	for (; B != `8`; ++B) {
1123	if (!Values [I+B].isUndef())
1124	break;
1125	}
1126	SDValue F = Values [I+B];
1127	AllT &= IsTrue (F);
1128	AllF &= IsFalse (F);
1129
1130	SDValue Ext = (B < `8`) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1131	: DAG.getUNDEF(MVT::i8);
1132	Bytes.push_back(Elt: Ext);
1133	// Verify that the rest of values in the group are the same as the
1134	// first.
1135	for (; B != `8`; ++B)
1136	assert(Values[I+B].isUndef() \|\| Values[I+B] == F);
1137	}
1138	}
1139
1140	if (AllT)
1141	return DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: VecTy);
1142	if (AllF)
1143	return DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: VecTy);
1144
1145	MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1146	SDValue ByteVec = buildHvxVectorReg(Values: Bytes, dl, VecTy: ByteTy, DAG);
1147	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1148	}
1149
1150	SDValue
1151	HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1152	const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1153	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1154
1155	unsigned ElemWidth = ElemTy.getSizeInBits();
1156	assert(ElemWidth >= `8` && ElemWidth <= `32`);
1157	(void)ElemWidth;
1158
1159	SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1160	SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1161	{VecV, ByteIdx});
1162	if (ElemTy == MVT::i32)
1163	return ExWord;
1164
1165	// Have an extracted word, need to extract the smaller element out of it.
1166	// 1. Extract the bits of (the original) IdxV that correspond to the index
1167	// of the desired element in the 32-bit word.
1168	SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1169	// 2. Extract the element from the word.
1170	SDValue ExVec = DAG.getBitcast(VT: tyVector(Ty: ty(Op: ExWord), ElemTy), V: ExWord);
1171	return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1172	}
1173
1174	SDValue
1175	HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1176	const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1177	// Implement other return types if necessary.
1178	assert(ResTy == MVT::i1);
1179
1180	unsigned HwLen = Subtarget.getVectorLength();
1181	MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1182	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1183
1184	unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1185	SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1186	IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1187
1188	SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1189	SDValue Zero = DAG.getTargetConstant(`0`, dl, MVT::i32);
1190	return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1191	}
1192
1193	SDValue
1194	HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1195	SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1196	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1197
1198	unsigned ElemWidth = ElemTy.getSizeInBits();
1199	assert(ElemWidth >= `8` && ElemWidth <= `32`);
1200	(void)ElemWidth;
1201
1202	auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1203	SDValue ByteIdxV) {
1204	MVT VecTy = ty(Op: VecV);
1205	unsigned HwLen = Subtarget.getVectorLength();
1206	SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32,
1207	{ByteIdxV, DAG.getConstant(-`4`, dl, MVT::i32)});
1208	SDValue RotV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {VecV, MaskV});
1209	SDValue InsV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, Ops: {RotV, ValV});
1210	SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1211	{DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1212	SDValue TorV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {InsV, SubV});
1213	return TorV;
1214	};
1215
1216	SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1217	if (ElemTy == MVT::i32)
1218	return InsertWord (VecV, ValV, ByteIdx);
1219
1220	// If this is not inserting a 32-bit word, convert it into such a thing.
1221	// 1. Extract the existing word from the target vector.
1222	SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1223	{ByteIdx, DAG.getConstant(`2`, dl, MVT::i32)});
1224	SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1225	dl, MVT::i32, DAG);
1226
1227	// 2. Treating the extracted word as a 32-bit vector, insert the given
1228	// value into it.
1229	SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1230	MVT SubVecTy = tyVector(Ty: ty(Op: Ext), ElemTy);
1231	SDValue Ins = insertVector(VecV: DAG.getBitcast(VT: SubVecTy, V: Ext),
1232	ValV, IdxV: SubIdx, dl, ValTy: ElemTy, DAG);
1233
1234	// 3. Insert the 32-bit word back into the original vector.
1235	return InsertWord (VecV, Ins, ByteIdx);
1236	}
1237
1238	SDValue
1239	HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1240	SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1241	unsigned HwLen = Subtarget.getVectorLength();
1242	MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1243	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1244
1245	unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1246	SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1247	IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1248	ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1249
1250	SDValue InsV = insertHvxElementReg(VecV: ByteVec, IdxV, ValV, dl, DAG);
1251	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ty(Op: VecV), Operand: InsV);
1252	}
1253
1254	SDValue
1255	HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1256	SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1257	MVT VecTy = ty(Op: VecV);
1258	unsigned HwLen = Subtarget.getVectorLength();
1259	unsigned Idx = IdxV.getNode()->getAsZExtVal();
1260	MVT ElemTy = VecTy.getVectorElementType();
1261	unsigned ElemWidth = ElemTy.getSizeInBits();
1262
1263	// If the source vector is a vector pair, get the single vector containing
1264	// the subvector of interest. The subvector will never overlap two single
1265	// vectors.
1266	if (isHvxPairTy(Ty: VecTy)) {
1267	if (Idx * ElemWidth >= `8`*HwLen)
1268	Idx -= VecTy.getVectorNumElements() / `2`;
1269
1270	VecV = OrigOp;
1271	if (typeSplit(VecTy).first == ResTy)
1272	return VecV;
1273	}
1274
1275	// The only meaningful subvectors of a single HVX vector are those that
1276	// fit in a scalar register.
1277	assert(ResTy.getSizeInBits() == `32` \|\| ResTy.getSizeInBits() == `64`);
1278
1279	MVT WordTy = tyVector(VecTy, MVT::i32);
1280	SDValue WordVec = DAG.getBitcast(VT: WordTy, V: VecV);
1281	unsigned WordIdx = (Idx*ElemWidth) / `32`;
1282
1283	SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1284	SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1285	if (ResTy.getSizeInBits() == `32`)
1286	return DAG.getBitcast(VT: ResTy, V: W0);
1287
1288	SDValue W1Idx = DAG.getConstant(WordIdx+`1`, dl, MVT::i32);
1289	SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1290	SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1291	return DAG.getBitcast(VT: ResTy, V: WW);
1292	}
1293
1294	SDValue
1295	HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1296	const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1297	MVT VecTy = ty(Op: VecV);
1298	unsigned HwLen = Subtarget.getVectorLength();
1299	MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1300	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1301	// IdxV is required to be a constant.
1302	unsigned Idx = IdxV.getNode()->getAsZExtVal();
1303
1304	unsigned ResLen = ResTy.getVectorNumElements();
1305	unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1306	unsigned Offset = Idx * BitBytes;
1307	SDValue Undef = DAG.getUNDEF(VT: ByteTy);
1308	SmallVector<int,`128`> Mask;
1309
1310	if (Subtarget.isHVXVectorType(VecTy: ResTy, IncludeBool: true)) {
1311	// Converting between two vector predicates. Since the result is shorter
1312	// than the source, it will correspond to a vector predicate with the
1313	// relevant bits replicated. The replication count is the ratio of the
1314	// source and target vector lengths.
1315	unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1316	assert(isPowerOf2_32(Rep) && HwLen % Rep == `0`);
1317	for (unsigned i = `0`; i != HwLen/Rep; ++i) {
1318	for (unsigned j = `0`; j != Rep; ++j)
1319	Mask.push_back(Elt: i + Offset);
1320	}
1321	SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1322	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: ShuffV);
1323	}
1324
1325	// Converting between a vector predicate and a scalar predicate. In the
1326	// vector predicate, a group of BitBytes bits will correspond to a single
1327	// i1 element of the source vector type. Those bits will all have the same
1328	// value. The same will be true for ByteVec, where each byte corresponds
1329	// to a bit in the vector predicate.
1330	// The algorithm is to traverse the ByteVec, going over the i1 values from
1331	// the source vector, and generate the corresponding representation in an
1332	// 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1333	// elements so that the interesting 8 bytes will be in the low end of the
1334	// vector.
1335	unsigned Rep = `8` / ResLen;
1336	// Make sure the output fill the entire vector register, so repeat the
1337	// 8-byte groups as many times as necessary.
1338	for (unsigned r = `0`; r != HwLen/ResLen; ++r) {
1339	// This will generate the indexes of the 8 interesting bytes.
1340	for (unsigned i = `0`; i != ResLen; ++i) {
1341	for (unsigned j = `0`; j != Rep; ++j)
1342	Mask.push_back(Elt: Offset + i*BitBytes);
1343	}
1344	}
1345
1346	SDValue Zero = getZero(dl, MVT::i32, DAG);
1347	SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1348	// Combine the two low words from ShuffV into a v8i8, and byte-compare
1349	// them against 0.
1350	SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1351	SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1352	{ShuffV, DAG.getConstant(`4`, dl, MVT::i32)});
1353	SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1354	return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1355	{Vec64, DAG.getTargetConstant(`0`, dl, MVT::i32)}, DAG);
1356	}
1357
1358	SDValue
1359	HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1360	SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1361	MVT VecTy = ty(Op: VecV);
1362	MVT SubTy = ty(Op: SubV);
1363	unsigned HwLen = Subtarget.getVectorLength();
1364	MVT ElemTy = VecTy.getVectorElementType();
1365	unsigned ElemWidth = ElemTy.getSizeInBits();
1366
1367	bool IsPair = isHvxPairTy(Ty: VecTy);
1368	MVT SingleTy = MVT::getVectorVT(VT: ElemTy, NumElements: (`8`*HwLen)/ElemWidth);
1369	// The two single vectors that VecV consists of, if it's a pair.
1370	SDValue V0, V1;
1371	SDValue SingleV = VecV;
1372	SDValue PickHi;
1373
1374	if (IsPair) {
1375	V0 = LoHalf(V: VecV, DAG);
1376	V1 = HiHalf(V: VecV, DAG);
1377
1378	SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1379	dl, MVT::i32);
1380	PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1381	if (isHvxSingleTy(Ty: SubTy)) {
1382	if (const auto CN = dyn_cast<const* ConstantSDNode>(Val: IdxV.getNode())) {
1383	unsigned Idx = CN->getZExtValue();
1384	assert(Idx == `0` \|\| Idx == VecTy.getVectorNumElements()/`2`);
1385	unsigned SubIdx = (Idx == `0`) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1386	return DAG.getTargetInsertSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV, Subreg: SubV);
1387	}
1388	// If IdxV is not a constant, generate the two variants: with the
1389	// SubV as the high and as the low subregister, and select the right
1390	// pair based on the IdxV.
1391	SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SubV, V1});
1392	SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SubV});
1393	return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1394	}
1395	// The subvector being inserted must be entirely contained in one of
1396	// the vectors V0 or V1. Set SingleV to the correct one, and update
1397	// IdxV to be the index relative to the beginning of that vector.
1398	SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1399	IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1400	SingleV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: SingleTy, N1: PickHi, N2: V1, N3: V0);
1401	}
1402
1403	// The only meaningful subvectors of a single HVX vector are those that
1404	// fit in a scalar register.
1405	assert(SubTy.getSizeInBits() == `32` \|\| SubTy.getSizeInBits() == `64`);
1406	// Convert IdxV to be index in bytes.
1407	auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1408	if (!IdxN \|\| !IdxN->isZero()) {
1409	IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1410	DAG.getConstant(ElemWidth/`8`, dl, MVT::i32));
1411	SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: IdxV);
1412	}
1413	// When inserting a single word, the rotation back to the original position
1414	// would be by HwLen-Idx, but if two words are inserted, it will need to be
1415	// by (HwLen-4)-Idx.
1416	unsigned RolBase = HwLen;
1417	if (SubTy.getSizeInBits() == `32`) {
1418	SDValue V = DAG.getBitcast(MVT::i32, SubV);
1419	SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: V);
1420	} else {
1421	SDValue V = DAG.getBitcast(MVT::i64, SubV);
1422	SDValue R0 = LoHalf(V, DAG);
1423	SDValue R1 = HiHalf(V, DAG);
1424	SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R0);
1425	SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1426	DAG.getConstant(`4`, dl, MVT::i32));
1427	SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R1);
1428	RolBase = HwLen-`4`;
1429	}
1430	// If the vector wasn't ror'ed, don't ror it back.
1431	if (RolBase != `4` \|\| !IdxN \|\| !IdxN->isZero()) {
1432	SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1433	DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1434	SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: RolV);
1435	}
1436
1437	if (IsPair) {
1438	SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SingleV, V1});
1439	SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SingleV});
1440	return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1441	}
1442	return SingleV;
1443	}
1444
1445	SDValue
1446	HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1447	SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1448	MVT VecTy = ty(Op: VecV);
1449	MVT SubTy = ty(Op: SubV);
1450	assert(Subtarget.isHVXVectorType(VecTy, true));
1451	// VecV is an HVX vector predicate. SubV may be either an HVX vector
1452	// predicate as well, or it can be a scalar predicate.
1453
1454	unsigned VecLen = VecTy.getVectorNumElements();
1455	unsigned HwLen = Subtarget.getVectorLength();
1456	assert(HwLen % VecLen == `0` && "Unexpected vector type");
1457
1458	unsigned Scale = VecLen / SubTy.getVectorNumElements();
1459	unsigned BitBytes = HwLen / VecLen;
1460	unsigned BlockLen = HwLen / Scale;
1461
1462	MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1463	SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1464	SDValue ByteSub = createHvxPrefixPred(PredV: SubV, dl, BitBytes, ZeroFill: false, DAG);
1465	SDValue ByteIdx;
1466
1467	auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1468	if (!IdxN \|\| !IdxN->isZero()) {
1469	ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1470	DAG.getConstant(BitBytes, dl, MVT::i32));
1471	ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteIdx);
1472	}
1473
1474	// ByteVec is the target vector VecV rotated in such a way that the
1475	// subvector should be inserted at index 0. Generate a predicate mask
1476	// and use vmux to do the insertion.
1477	assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1478	MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1479	SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1480	{DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1481	ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1482	// Rotate ByteVec back, and convert to a vector predicate.
1483	if (!IdxN \|\| !IdxN->isZero()) {
1484	SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1485	SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1486	ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteXdi);
1487	}
1488	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1489	}
1490
1491	SDValue
1492	HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1493	MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1494	// Sign- and any-extending of a vector predicate to a vector register is
1495	// equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1496	// a vector of 1s (where the 1s are of type matching the vector type).
1497	assert(Subtarget.isHVXVectorType(ResTy));
1498	if (!ZeroExt)
1499	return DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ResTy, Operand: VecV);
1500
1501	assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1502	SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1503	DAG.getConstant(`1`, dl, MVT::i32));
1504	SDValue False = getZero(dl, Ty: ResTy, DAG);
1505	return DAG.getSelect(DL: dl, VT: ResTy, Cond: VecV, LHS: True, RHS: False);
1506	}
1507
1508	SDValue
1509	HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1510	MVT ResTy, SelectionDAG &DAG) const {
1511	// Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1512	// (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1513	// vector register. The remaining bits of the vector register are
1514	// unspecified.
1515
1516	MachineFunction &MF = DAG.getMachineFunction();
1517	unsigned HwLen = Subtarget.getVectorLength();
1518	MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1519	MVT PredTy = ty(Op: VecQ);
1520	unsigned PredLen = PredTy.getVectorNumElements();
1521	assert(HwLen % PredLen == `0`);
1522	MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: `8`*HwLen/PredLen), NumElements: PredLen);
1523
1524	Type Int8Ty = Type::getInt8Ty(C&: DAG.getContext());
1525	SmallVector<Constant*, `128`> Tmp;
1526	// Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1527	// These are bytes with the LSB rotated left with respect to their index.
1528	for (unsigned i = `0`; i != HwLen/`8`; ++i) {
1529	for (unsigned j = `0`; j != `8`; ++j)
1530	Tmp.push_back(Elt: ConstantInt::get(Ty: Int8Ty, V: `1ull` << j));
1531	}
1532	Constant *CV = ConstantVector::get(V: Tmp);
1533	Align Alignment(HwLen);
1534	SDValue CP =
1535	LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: ByteTy, Align: Alignment), DAG);
1536	SDValue Bytes =
1537	DAG.getLoad(VT: ByteTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
1538	PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
1539
1540	// Select the bytes that correspond to true bits in the vector predicate.
1541	SDValue Sel = DAG.getSelect(DL: dl, VT: VecTy, Cond: VecQ, LHS: DAG.getBitcast(VT: VecTy, V: Bytes),
1542	RHS: getZero(dl, Ty: VecTy, DAG));
1543	// Calculate the OR of all bytes in each group of 8. That will compress
1544	// all the individual bits into a single byte.
1545	// First, OR groups of 4, via vrmpy with 0x01010101.
1546	SDValue All1 =
1547	DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(`1`, dl, MVT::i32));
1548	SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1549	// Then rotate the accumulated vector by 4 bytes, and do the final OR.
1550	SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1551	{Vrmpy, Vrmpy, DAG.getTargetConstant(`4`, dl, MVT::i32)}, DAG);
1552	SDValue Vor = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, Ops: {Vrmpy, Rot});
1553
1554	// Pick every 8th byte and coalesce them at the beginning of the output.
1555	// For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1556	// byte and so on.
1557	SmallVector<int,`128`> Mask;
1558	for (unsigned i = `0`; i != HwLen; ++i)
1559	Mask.push_back(Elt: (`8`*i) % HwLen + i/(HwLen/`8`));
1560	SDValue Collect =
1561	DAG.getVectorShuffle(VT: ByteTy, dl, N1: Vor, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1562	return DAG.getBitcast(VT: ResTy, V: Collect);
1563	}
1564
1565	SDValue
1566	HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1567	const SDLoc &dl, SelectionDAG &DAG) const {
1568	// Take a vector and resize the element type to match the given type.
1569	MVT InpTy = ty(Op: VecV);
1570	if (InpTy == ResTy)
1571	return VecV;
1572
1573	unsigned InpWidth = InpTy.getSizeInBits();
1574	unsigned ResWidth = ResTy.getSizeInBits();
1575
1576	if (InpTy.isFloatingPoint()) {
1577	return InpWidth < ResWidth ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1578	: DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1579	getZero(dl, MVT::i32, DAG));
1580	}
1581
1582	assert(InpTy.isInteger());
1583
1584	if (InpWidth < ResWidth) {
1585	unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1586	return DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ResTy, Operand: VecV);
1587	} else {
1588	unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1589	return DAG.getNode(Opcode: NarOpc, DL: dl, VT: ResTy, N1: VecV, N2: DAG.getValueType(ResTy));
1590	}
1591	}
1592
1593	SDValue
1594	HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1595	SelectionDAG &DAG) const {
1596	assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == `0`);
1597
1598	const SDLoc &dl(Vec);
1599	unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1600	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1601	{Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1602	}
1603
1604	SDValue
1605	HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1606	const {
1607	const SDLoc &dl(Op);
1608	MVT VecTy = ty(Op);
1609
1610	unsigned Size = Op.getNumOperands();
1611	SmallVector<SDValue,`128`> Ops;
1612	for (unsigned i = `0`; i != Size; ++i)
1613	Ops.push_back(Elt: Op.getOperand(i));
1614
1615	// First, split the BUILD_VECTOR for vector pairs. We could generate
1616	// some pairs directly (via splat), but splats should be generated
1617	// by the combiner prior to getting here.
1618	if (VecTy.getSizeInBits() == `16`*Subtarget.getVectorLength()) {
1619	ArrayRef<SDValue> A(Ops);
1620	MVT SingleTy = typeSplit(VecTy).first;
1621	SDValue V0 = buildHvxVectorReg(Values: A.take_front(N: Size/`2`), dl, VecTy: SingleTy, DAG);
1622	SDValue V1 = buildHvxVectorReg(Values: A.drop_front(N: Size/`2`), dl, VecTy: SingleTy, DAG);
1623	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, N1: V0, N2: V1);
1624	}
1625
1626	if (VecTy.getVectorElementType() == MVT::i1)
1627	return buildHvxVectorPred(Values: Ops, dl, VecTy, DAG);
1628
1629	// In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1630	// not a legal type, just bitcast the node to use i16
1631	// types and bitcast the result back to f16
1632	if (VecTy.getVectorElementType() == MVT::f16) {
1633	SmallVector<SDValue,`64`> NewOps;
1634	for (unsigned i = `0`; i != Size; i++)
1635	NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1636
1637	SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1638	tyVector(VecTy, MVT::i16), NewOps);
1639	return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1640	}
1641
1642	return buildHvxVectorReg(Values: Ops, dl, VecTy, DAG);
1643	}
1644
1645	SDValue
1646	HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1647	const {
1648	const SDLoc &dl(Op);
1649	MVT VecTy = ty(Op);
1650	MVT ArgTy = ty(Op: Op.getOperand(i: `0`));
1651
1652	if (ArgTy == MVT::f16) {
1653	MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1654	SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(`0`));
1655	SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1656	SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: SplatTy, Operand: ToInt32);
1657	return DAG.getBitcast(VT: VecTy, V: Splat);
1658	}
1659
1660	return SDValue ();
1661	}
1662
1663	SDValue
1664	HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1665	const {
1666	// Vector concatenation of two integer (non-bool) vectors does not need
1667	// special lowering. Custom-lower concats of bool vectors and expand
1668	// concats of more than 2 vectors.
1669	MVT VecTy = ty(Op);
1670	const SDLoc &dl(Op);
1671	unsigned NumOp = Op.getNumOperands();
1672	if (VecTy.getVectorElementType() != MVT::i1) {
1673	if (NumOp == `2`)
1674	return Op;
1675	// Expand the other cases into a build-vector.
1676	SmallVector<SDValue,`8`> Elems;
1677	for (SDValue V : Op.getNode()->ops())
1678	DAG.ExtractVectorElements(Op: V, Args&: Elems);
1679	// A vector of i16 will be broken up into a build_vector of i16's.
1680	// This is a problem, since at the time of operation legalization,
1681	// all operations are expected to be type-legalized, and i16 is not
1682	// a legal type. If any of the extracted elements is not of a valid
1683	// type, sign-extend it to a valid one.
1684	for (unsigned i = `0`, e = Elems.size(); i != e; ++i) {
1685	SDValue V = Elems [i];
1686	MVT Ty = ty(Op: V);
1687	if (!isTypeLegal(VT: Ty)) {
1688	MVT NTy = typeLegalize(Ty, DAG);
1689	if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1690	Elems [i] = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: NTy,
1691	N1: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: NTy,
1692	N1: V.getOperand(i: `0`), N2: V.getOperand(i: `1`)),
1693	N2: DAG.getValueType(Ty));
1694	continue;
1695	}
1696	// A few less complicated cases.
1697	switch (V.getOpcode()) {
1698	case ISD::Constant:
1699	Elems [i] = DAG.getSExtOrTrunc(Op: V, DL: dl, VT: NTy);
1700	break;
1701	case ISD::UNDEF:
1702	Elems [i] = DAG.getUNDEF(VT: NTy);
1703	break;
1704	case ISD::TRUNCATE:
1705	Elems [i] = V.getOperand(i: `0`);
1706	break;
1707	default:
1708	llvm_unreachable("Unexpected vector element");
1709	}
1710	}
1711	}
1712	return DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Elems);
1713	}
1714
1715	assert(VecTy.getVectorElementType() == MVT::i1);
1716	unsigned HwLen = Subtarget.getVectorLength();
1717	assert(isPowerOf2_32(NumOp) && HwLen % NumOp == `0`);
1718
1719	SDValue Op0 = Op.getOperand(i: `0`);
1720
1721	// If the operands are HVX types (i.e. not scalar predicates), then
1722	// defer the concatenation, and create QCAT instead.
1723	if (Subtarget.isHVXVectorType(VecTy: ty(Op: Op0), IncludeBool: true)) {
1724	if (NumOp == `2`)
1725	return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: Op0, N2: Op.getOperand(i: `1`));
1726
1727	ArrayRef<SDUse> U(Op.getNode()->ops());
1728	SmallVector<SDValue,`4`> SV(U.begin(), U.end());
1729	ArrayRef<SDValue> Ops(SV);
1730
1731	MVT HalfTy = typeSplit(VecTy).first;
1732	SDValue V0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1733	Ops: Ops.take_front(N: NumOp/`2`));
1734	SDValue V1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1735	Ops: Ops.take_back(N: NumOp/`2`));
1736	return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: V0, N2: V1);
1737	}
1738
1739	// Count how many bytes (in a vector register) each bit in VecTy
1740	// corresponds to.
1741	unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1742
1743	SmallVector<SDValue,`8`> Prefixes;
1744	for (SDValue V : Op.getNode()->op_values()) {
1745	SDValue P = createHvxPrefixPred(PredV: V, dl, BitBytes, ZeroFill: true, DAG);
1746	Prefixes.push_back(Elt: P);
1747	}
1748
1749	unsigned InpLen = ty(Op: Op.getOperand(i: `0`)).getVectorNumElements();
1750	MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1751	SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32);
1752	SDValue Res = getZero(dl, Ty: ByteTy, DAG);
1753	for (unsigned i = `0`, e = Prefixes.size(); i != e; ++i) {
1754	Res = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Res, N2: S);
1755	Res = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, N1: Res, N2: Prefixes [e-i-`1`]);
1756	}
1757	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: Res);
1758	}
1759
1760	SDValue
1761	HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1762	const {
1763	// Change the type of the extracted element to i32.
1764	SDValue VecV = Op.getOperand(i: `0`);
1765	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1766	const SDLoc &dl(Op);
1767	SDValue IdxV = Op.getOperand(i: `1`);
1768	if (ElemTy == MVT::i1)
1769	return extractHvxElementPred(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1770
1771	return extractHvxElementReg(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1772	}
1773
1774	SDValue
1775	HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1776	const {
1777	const SDLoc &dl(Op);
1778	MVT VecTy = ty(Op);
1779	SDValue VecV = Op.getOperand(i: `0`);
1780	SDValue ValV = Op.getOperand(i: `1`);
1781	SDValue IdxV = Op.getOperand(i: `2`);
1782	MVT ElemTy = ty(Op: VecV).getVectorElementType();
1783	if (ElemTy == MVT::i1)
1784	return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1785
1786	if (ElemTy == MVT::f16) {
1787	SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
1788	tyVector(VecTy, MVT::i16),
1789	DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1790	DAG.getBitcast(MVT::i16, ValV), IdxV);
1791	return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1792	}
1793
1794	return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1795	}
1796
1797	SDValue
1798	HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1799	const {
1800	SDValue SrcV = Op.getOperand(i: `0`);
1801	MVT SrcTy = ty(Op: SrcV);
1802	MVT DstTy = ty(Op);
1803	SDValue IdxV = Op.getOperand(i: `1`);
1804	unsigned Idx = IdxV.getNode()->getAsZExtVal();
1805	assert(Idx % DstTy.getVectorNumElements() == `0`);
1806	(void)Idx;
1807	const SDLoc &dl(Op);
1808
1809	MVT ElemTy = SrcTy.getVectorElementType();
1810	if (ElemTy == MVT::i1)
1811	return extractHvxSubvectorPred(VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1812
1813	return extractHvxSubvectorReg(OrigOp: Op, VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1814	}
1815
1816	SDValue
1817	HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1818	const {
1819	// Idx does not need to be a constant.
1820	SDValue VecV = Op.getOperand(i: `0`);
1821	SDValue ValV = Op.getOperand(i: `1`);
1822	SDValue IdxV = Op.getOperand(i: `2`);
1823
1824	const SDLoc &dl(Op);
1825	MVT VecTy = ty(Op: VecV);
1826	MVT ElemTy = VecTy.getVectorElementType();
1827	if (ElemTy == MVT::i1)
1828	return insertHvxSubvectorPred(VecV, SubV: ValV, IdxV, dl, DAG);
1829
1830	return insertHvxSubvectorReg(VecV, SubV: ValV, IdxV, dl, DAG);
1831	}
1832
1833	SDValue
1834	HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1835	// Lower any-extends of boolean vectors to sign-extends, since they
1836	// translate directly to Q2V. Zero-extending could also be done equally
1837	// fast, but Q2V is used/recognized in more places.
1838	// For all other vectors, use zero-extend.
1839	MVT ResTy = ty(Op);
1840	SDValue InpV = Op.getOperand(i: `0`);
1841	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1842	if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1843	return LowerHvxSignExt(Op, DAG);
1844	return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc (Op), VT: ResTy, Operand: InpV);
1845	}
1846
1847	SDValue
1848	HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1849	MVT ResTy = ty(Op);
1850	SDValue InpV = Op.getOperand(i: `0`);
1851	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1852	if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1853	return extendHvxVectorPred(VecV: InpV, dl: SDLoc (Op), ResTy: ty(Op), ZeroExt: false, DAG);
1854	return Op;
1855	}
1856
1857	SDValue
1858	HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1859	MVT ResTy = ty(Op);
1860	SDValue InpV = Op.getOperand(i: `0`);
1861	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1862	if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1863	return extendHvxVectorPred(VecV: InpV, dl: SDLoc (Op), ResTy: ty(Op), ZeroExt: true, DAG);
1864	return Op;
1865	}
1866
1867	SDValue
1868	HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1869	// Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1870	// cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1871	const SDLoc &dl(Op);
1872	MVT ResTy = ty(Op);
1873	SDValue InpV = Op.getOperand(i: `0`);
1874	assert(ResTy == ty(InpV));
1875
1876	// Calculate the vectors of 1 and bitwidth(x).
1877	MVT ElemTy = ty(Op: InpV).getVectorElementType();
1878	unsigned ElemWidth = ElemTy.getSizeInBits();
1879
1880	SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1881	DAG.getConstant(`1`, dl, MVT::i32));
1882	SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1883	DAG.getConstant(ElemWidth, dl, MVT::i32));
1884	SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1885	DAG.getConstant(-`1`, dl, MVT::i32));
1886
1887	// Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1888	// a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1889	// it separately in custom combine or selection).
1890	SDValue A = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy,
1891	Ops: {DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {InpV, VecN1}),
1892	DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {InpV, Vec1})});
1893	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy,
1894	Ops: {VecW, DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: ResTy, Operand: A)});
1895	}
1896
1897	SDValue
1898	HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1899	const SDLoc &dl(Op);
1900	MVT ResTy = ty(Op);
1901	assert(ResTy.getVectorElementType() == MVT::i32);
1902
1903	SDValue Vs = Op.getOperand(i: `0`);
1904	SDValue Vt = Op.getOperand(i: `1`);
1905
1906	SDVTList ResTys = DAG.getVTList(VT1: ResTy, VT2: ResTy);
1907	unsigned Opc = Op.getOpcode();
1908
1909	// On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1910	if (Opc == ISD::MULHU)
1911	return DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: `1`);
1912	if (Opc == ISD::MULHS)
1913	return DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: `1`);
1914
1915	#ifndef NDEBUG
1916	Op.dump(G: &DAG);
1917	#endif
1918	llvm_unreachable("Unexpected mulh operation");
1919	}
1920
1921	SDValue
1922	HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1923	const SDLoc &dl(Op);
1924	unsigned Opc = Op.getOpcode();
1925	SDValue Vu = Op.getOperand(i: `0`);
1926	SDValue Vv = Op.getOperand(i: `1`);
1927
1928	// If the HI part is not used, convert it to a regular MUL.
1929	if (auto HiVal = Op.getValue(R: `1`); HiVal.use_empty()) {
1930	// Need to preserve the types and the number of values.
1931	SDValue Hi = DAG.getUNDEF(VT: ty(Op: HiVal));
1932	SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: ty(Op), Ops: {Vu, Vv});
1933	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
1934	}
1935
1936	bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1937	bool SignedVv = Opc == HexagonISD::SMUL_LOHI \|\| Opc == HexagonISD::USMUL_LOHI;
1938
1939	// Legal on HVX v62+, but lower it here because patterns can't handle multi-
1940	// valued nodes.
1941	if (Subtarget.useHVXV62Ops())
1942	return emitHvxMulLoHiV62(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
1943
1944	if (Opc == HexagonISD::SMUL_LOHI) {
1945	// Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1946	// for other signedness LOHI is cheaper.
1947	if (auto LoVal = Op.getValue(R: `0`); LoVal.use_empty()) {
1948	SDValue Hi = emitHvxMulHsV60(A: Vu, B: Vv, dl, DAG);
1949	SDValue Lo = DAG.getUNDEF(VT: ty(Op: LoVal));
1950	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
1951	}
1952	}
1953
1954	return emitHvxMulLoHiV60(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
1955	}
1956
1957	SDValue
1958	HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1959	SDValue Val = Op.getOperand(i: `0`);
1960	MVT ResTy = ty(Op);
1961	MVT ValTy = ty(Op: Val);
1962	const SDLoc &dl(Op);
1963
1964	if (isHvxBoolTy(Ty: ValTy) && ResTy.isScalarInteger()) {
1965	unsigned HwLen = Subtarget.getVectorLength();
1966	MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/`4`);
1967	SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG);
1968	unsigned BitWidth = ResTy.getSizeInBits();
1969
1970	if (BitWidth < `64`) {
1971	SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(`0`, dl, MVT::i32),
1972	dl, MVT::i32, DAG);
1973	if (BitWidth == `32`)
1974	return W0;
1975	assert(BitWidth < `32u`);
1976	return DAG.getZExtOrTrunc(Op: W0, DL: dl, VT: ResTy);
1977	}
1978
1979	// The result is >= 64 bits. The only options are 64 or 128.
1980	assert(BitWidth == `64` \|\| BitWidth == `128`);
1981	SmallVector<SDValue,`4`> Words;
1982	for (unsigned i = `0`; i != BitWidth/`32`; ++i) {
1983	SDValue W = extractHvxElementReg(
1984	VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
1985	Words.push_back(Elt: W);
1986	}
1987	SmallVector<SDValue,`2`> Combines;
1988	assert(Words.size() % `2` == `0`);
1989	for (unsigned i = `0`, e = Words.size(); i < e; i += `2`) {
1990	SDValue C = getCombine(Words[i+`1`], Words[i], dl, MVT::i64, DAG);
1991	Combines.push_back(Elt: C);
1992	}
1993
1994	if (BitWidth == `64`)
1995	return Combines [`0`];
1996
1997	return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: ResTy, Ops: Combines);
1998	}
1999	if (isHvxBoolTy(Ty: ResTy) && ValTy.isScalarInteger()) {
2000	// Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2001	unsigned BitWidth = ValTy.getSizeInBits();
2002	unsigned HwLen = Subtarget.getVectorLength();
2003	assert(BitWidth == HwLen);
2004
2005	MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / `8`);
2006	SDValue ValAsVec = DAG.getBitcast(VT: ValAsVecTy, V: Val);
2007	// Splat each byte of Val 8 times.
2008	// Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2009	// where b0, b1,..., b15 are least to most significant bytes of I.
2010	SmallVector<SDValue, `128`> Bytes;
2011	// Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2012	// These are bytes with the LSB rotated left with respect to their index.
2013	SmallVector<SDValue, `128`> Tmp;
2014	for (unsigned I = `0`; I != HwLen / `8`; ++I) {
2015	SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2016	SDValue Byte =
2017	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2018	for (unsigned J = `0`; J != `8`; ++J) {
2019	Bytes.push_back(Elt: Byte);
2020	Tmp.push_back(DAG.getConstant(`1ull` << J, dl, MVT::i8));
2021	}
2022	}
2023
2024	MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2025	SDValue ConstantVec = DAG.getBuildVector(VT: ConstantVecTy, DL: dl, Ops: Tmp);
2026	SDValue I2V = buildHvxVectorReg(Values: Bytes, dl, VecTy: ConstantVecTy, DAG);
2027
2028	// Each Byte in the I2V will be set iff corresponding bit is set in Val.
2029	I2V = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ConstantVecTy, Ops: {I2V, ConstantVec});
2030	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: I2V);
2031	}
2032
2033	return Op;
2034	}
2035
2036	SDValue
2037	HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2038	// Sign- and zero-extends are legal.
2039	assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2040	return DAG.getNode(Opcode: ISD::ZERO_EXTEND_VECTOR_INREG, DL: SDLoc (Op), VT: ty(Op),
2041	Operand: Op.getOperand(i: `0`));
2042	}
2043
2044	SDValue
2045	HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2046	MVT ResTy = ty(Op);
2047	if (ResTy.getVectorElementType() != MVT::i1)
2048	return Op;
2049
2050	const SDLoc &dl(Op);
2051	unsigned HwLen = Subtarget.getVectorLength();
2052	unsigned VecLen = ResTy.getVectorNumElements();
2053	assert(HwLen % VecLen == `0`);
2054	unsigned ElemSize = HwLen / VecLen;
2055
2056	MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemSize * `8`), NumElements: VecLen);
2057	SDValue S =
2058	DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: Op.getOperand(i: `0`),
2059	N2: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: `1`)),
2060	N3: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: `2`)));
2061	return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: S);
2062	}
2063
2064	SDValue
2065	HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2066	if (SDValue S = getVectorShiftByInt(Op, DAG))
2067	return S;
2068	return Op;
2069	}
2070
2071	SDValue
2072	HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2073	SelectionDAG &DAG) const {
2074	unsigned Opc = Op.getOpcode();
2075	assert(Opc == ISD::FSHL \|\| Opc == ISD::FSHR);
2076
2077	// Make sure the shift amount is within the range of the bitwidth
2078	// of the element type.
2079	SDValue A = Op.getOperand(i: `0`);
2080	SDValue B = Op.getOperand(i: `1`);
2081	SDValue S = Op.getOperand(i: `2`);
2082
2083	MVT InpTy = ty(Op: A);
2084	MVT ElemTy = InpTy.getVectorElementType();
2085
2086	const SDLoc &dl(Op);
2087	unsigned ElemWidth = ElemTy.getSizeInBits();
2088	bool IsLeft = Opc == ISD::FSHL;
2089
2090	// The expansion into regular shifts produces worse code for i8 and for
2091	// right shift of i32 on v65+.
2092	bool UseShifts = ElemTy != MVT::i8;
2093	if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2094	UseShifts = false;
2095
2096	if (SDValue SplatV = getSplatValue(Op: S, DAG); SplatV && UseShifts) {
2097	// If this is a funnel shift by a scalar, lower it into regular shifts.
2098	SDValue Mask = DAG.getConstant(ElemWidth - `1`, dl, MVT::i32);
2099	SDValue ModS =
2100	DAG.getNode(ISD::AND, dl, MVT::i32,
2101	{DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2102	SDValue NegS =
2103	DAG.getNode(ISD::SUB, dl, MVT::i32,
2104	{DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2105	SDValue IsZero =
2106	DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2107	// FSHL A, B => A << \| B >>n
2108	// FSHR A, B => A <<n \| B >>
2109	SDValue Part1 =
2110	DAG.getNode(Opcode: HexagonISD::VASL, DL: dl, VT: InpTy, Ops: {A, IsLeft ? ModS : NegS});
2111	SDValue Part2 =
2112	DAG.getNode(Opcode: HexagonISD::VLSR, DL: dl, VT: InpTy, Ops: {B, IsLeft ? NegS : ModS});
2113	SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Part1, Part2});
2114	// If the shift amount was 0, pick A or B, depending on the direction.
2115	// The opposite shift will also be by 0, so the "Or" will be incorrect.
2116	return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: InpTy, Ops: {IsZero, (IsLeft ? A : B), Or});
2117	}
2118
2119	SDValue Mask = DAG.getSplatBuildVector(
2120	VT: InpTy, DL: dl, Op: DAG.getConstant(Val: ElemWidth - `1`, DL: dl, VT: ElemTy));
2121
2122	unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2123	return DAG.getNode(Opcode: MOpc, DL: dl, VT: ty(Op),
2124	Ops: {A, B, DAG.getNode(Opcode: ISD::AND, DL: dl, VT: InpTy, Ops: {S, Mask})});
2125	}
2126
2127	SDValue
2128	HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2129	const SDLoc &dl(Op);
2130	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
2131	SmallVector<SDValue> Ops(Op ->ops().begin(), Op ->ops().end());
2132
2133	auto Swap = [&](SDValue P) {
2134	return DAG.getMergeValues(Ops: {P.getValue(R: `1`), P.getValue(R: `0`)}, dl);
2135	};
2136
2137	switch (IntNo) {
2138	case Intrinsic::hexagon_V6_pred_typecast:
2139	case Intrinsic::hexagon_V6_pred_typecast_128B: {
2140	MVT ResTy = ty(Op), InpTy = ty(Op: Ops [`1`]);
2141	if (isHvxBoolTy(Ty: ResTy) && isHvxBoolTy(Ty: InpTy)) {
2142	if (ResTy == InpTy)
2143	return Ops [`1`];
2144	return DAG.getNode(Opcode: HexagonISD::TYPECAST, DL: dl, VT: ResTy, Operand: Ops [`1`]);
2145	}
2146	break;
2147	}
2148	case Intrinsic::hexagon_V6_vmpyss_parts:
2149	case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2150	return Swap (DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: Op ->getVTList(),
2151	Ops: {Ops [`1`], Ops [`2`]}));
2152	case Intrinsic::hexagon_V6_vmpyuu_parts:
2153	case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2154	return Swap (DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: Op ->getVTList(),
2155	Ops: {Ops [`1`], Ops [`2`]}));
2156	case Intrinsic::hexagon_V6_vmpyus_parts:
2157	case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2158	return Swap (DAG.getNode(Opcode: HexagonISD::USMUL_LOHI, DL: dl, VTList: Op ->getVTList(),
2159	Ops: {Ops [`1`], Ops [`2`]}));
2160	}
2161	} // switch
2162
2163	return Op;
2164	}
2165
2166	SDValue
2167	HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2168	const SDLoc &dl(Op);
2169	unsigned HwLen = Subtarget.getVectorLength();
2170	MachineFunction &MF = DAG.getMachineFunction();
2171	auto *MaskN = cast<MaskedLoadStoreSDNode>(Val: Op.getNode());
2172	SDValue Mask = MaskN->getMask();
2173	SDValue Chain = MaskN->getChain();
2174	SDValue Base = MaskN->getBasePtr();
2175	auto *MemOp = MF.getMachineMemOperand(MMO: MaskN->getMemOperand(), Offset: `0`, Size: HwLen);
2176
2177	unsigned Opc = Op ->getOpcode();
2178	assert(Opc == ISD::MLOAD \|\| Opc == ISD::MSTORE);
2179
2180	if (Opc == ISD::MLOAD) {
2181	MVT ValTy = ty(Op);
2182	SDValue Load = DAG.getLoad(VT: ValTy, dl, Chain, Ptr: Base, MMO: MemOp);
2183	SDValue Thru = cast<MaskedLoadSDNode>(Val: MaskN)->getPassThru();
2184	if (isUndef(Op: Thru))
2185	return Load;
2186	SDValue VSel = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ValTy, N1: Mask, N2: Load, N3: Thru);
2187	return DAG.getMergeValues(Ops: {VSel, Load.getValue(R: `1`)}, dl);
2188	}
2189
2190	// MSTORE
2191	// HVX only has aligned masked stores.
2192
2193	// TODO: Fold negations of the mask into the store.
2194	unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2195	SDValue Value = cast<MaskedStoreSDNode>(Val: MaskN)->getValue();
2196	SDValue Offset0 = DAG.getTargetConstant(Val: `0`, DL: dl, VT: ty(Op: Base));
2197
2198	if (MaskN->getAlign().value() % HwLen == `0`) {
2199	SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2200	{Mask, Base, Offset0, Value, Chain}, DAG);
2201	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Store.getNode()), NewMemRefs: {MemOp});
2202	return Store;
2203	}
2204
2205	// Unaligned case.
2206	auto StoreAlign = [&](SDValue V, SDValue A) {
2207	SDValue Z = getZero(dl, Ty: ty(Op: V), DAG);
2208	// TODO: use funnel shifts?
2209	// vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2210	// upper half.
2211	SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2212	SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2213	return std::make_pair(x&: LoV, y&: HiV);
2214	};
2215
2216	MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2217	MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2218	SDValue MaskV = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Mask);
2219	VectorPair Tmp = StoreAlign (MaskV, Base);
2220	VectorPair MaskU = {DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.first),
2221	DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.second)};
2222	VectorPair ValueU = StoreAlign (Value, Base);
2223
2224	SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2225	SDValue StoreLo =
2226	getInstr(StoreOpc, dl, MVT::Other,
2227	{MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2228	SDValue StoreHi =
2229	getInstr(StoreOpc, dl, MVT::Other,
2230	{MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2231	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreLo.getNode()), NewMemRefs: {MemOp});
2232	DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreHi.getNode()), NewMemRefs: {MemOp});
2233	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2234	}
2235
2236	SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2237	SelectionDAG &DAG) const {
2238	// This conversion only applies to QFloat. IEEE extension from f16 to f32
2239	// is legal (done via a pattern).
2240	assert(Subtarget.useHVXQFloatOps());
2241
2242	assert(Op ->getOpcode() == ISD::FP_EXTEND);
2243
2244	MVT VecTy = ty(Op);
2245	MVT ArgTy = ty(Op: Op.getOperand(i: `0`));
2246	const SDLoc &dl(Op);
2247	assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2248
2249	SDValue F16Vec = Op.getOperand(i: `0`);
2250
2251	APFloat FloatVal = APFloat (`1.0f`);
2252	bool Ignored;
2253	FloatVal.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, losesInfo: &Ignored);
2254	SDValue Fp16Ones = DAG.getConstantFP(Val: FloatVal, DL: dl, VT: ArgTy);
2255	SDValue VmpyVec =
2256	getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2257
2258	MVT HalfTy = typeSplit(VecTy).first;
2259	VectorPair Pair = opSplit(Vec: VmpyVec, dl, DAG);
2260	SDValue LoVec =
2261	getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2262	SDValue HiVec =
2263	getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2264
2265	SDValue ShuffVec =
2266	getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2267	{HiVec, LoVec, DAG.getConstant(-`4`, dl, MVT::i32)}, DAG);
2268
2269	return ShuffVec;
2270	}
2271
2272	SDValue
2273	HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2274	// Catch invalid conversion ops (just in case).
2275	assert(Op.getOpcode() == ISD::FP_TO_SINT \|\|
2276	Op.getOpcode() == ISD::FP_TO_UINT);
2277
2278	MVT ResTy = ty(Op);
2279	MVT FpTy = ty(Op: Op.getOperand(i: `0`)).getVectorElementType();
2280	MVT IntTy = ResTy.getVectorElementType();
2281
2282	if (Subtarget.useHVXIEEEFPOps()) {
2283	// There are only conversions from f16.
2284	if (FpTy == MVT::f16) {
2285	// Other int types aren't legal in HVX, so we shouldn't see them here.
2286	assert(IntTy == MVT::i8 \|\| IntTy == MVT::i16 \|\| IntTy == MVT::i32);
2287	// Conversions to i8 and i16 are legal.
2288	if (IntTy == MVT::i8 \|\| IntTy == MVT::i16)
2289	return Op;
2290	}
2291	}
2292
2293	if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2294	return EqualizeFpIntConversion(Op, DAG);
2295
2296	return ExpandHvxFpToInt(Op, DAG);
2297	}
2298
2299	SDValue
2300	HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2301	// Catch invalid conversion ops (just in case).
2302	assert(Op.getOpcode() == ISD::SINT_TO_FP \|\|
2303	Op.getOpcode() == ISD::UINT_TO_FP);
2304
2305	MVT ResTy = ty(Op);
2306	MVT IntTy = ty(Op: Op.getOperand(i: `0`)).getVectorElementType();
2307	MVT FpTy = ResTy.getVectorElementType();
2308
2309	if (Subtarget.useHVXIEEEFPOps()) {
2310	// There are only conversions to f16.
2311	if (FpTy == MVT::f16) {
2312	// Other int types aren't legal in HVX, so we shouldn't see them here.
2313	assert(IntTy == MVT::i8 \|\| IntTy == MVT::i16 \|\| IntTy == MVT::i32);
2314	// i8, i16 -> f16 is legal.
2315	if (IntTy == MVT::i8 \|\| IntTy == MVT::i16)
2316	return Op;
2317	}
2318	}
2319
2320	if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2321	return EqualizeFpIntConversion(Op, DAG);
2322
2323	return ExpandHvxIntToFp(Op, DAG);
2324	}
2325
2326	HexagonTargetLowering::TypePair
2327	HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2328	// Compare the widths of elements of the two types, and extend the narrower
2329	// type to match the with of the wider type. For vector types, apply this
2330	// to the element type.
2331	assert(Ty0.isVector() == Ty1.isVector());
2332
2333	MVT ElemTy0 = Ty0.getScalarType();
2334	MVT ElemTy1 = Ty1.getScalarType();
2335
2336	unsigned Width0 = ElemTy0.getSizeInBits();
2337	unsigned Width1 = ElemTy1.getSizeInBits();
2338	unsigned MaxWidth = std::max(a: Width0, b: Width1);
2339
2340	auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2341	if (ScalarTy.isInteger())
2342	return MVT::getIntegerVT(BitWidth: Width);
2343	assert(ScalarTy.isFloatingPoint());
2344	return MVT::getFloatingPointVT(BitWidth: Width);
2345	};
2346
2347	MVT WideETy0 = getScalarWithWidth (ElemTy0, MaxWidth);
2348	MVT WideETy1 = getScalarWithWidth (ElemTy1, MaxWidth);
2349
2350	if (!Ty0.isVector()) {
2351	// Both types are scalars.
2352	return {WideETy0, WideETy1};
2353	}
2354
2355	// Vector types.
2356	unsigned NumElem = Ty0.getVectorNumElements();
2357	assert(NumElem == Ty1.getVectorNumElements());
2358
2359	return {MVT::getVectorVT(VT: WideETy0, NumElements: NumElem),
2360	MVT::getVectorVT(VT: WideETy1, NumElements: NumElem)};
2361	}
2362
2363	HexagonTargetLowering::TypePair
2364	HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2365	// Compare the numbers of elements of two vector types, and widen the
2366	// narrower one to match the number of elements in the wider one.
2367	assert(Ty0.isVector() && Ty1.isVector());
2368
2369	unsigned Len0 = Ty0.getVectorNumElements();
2370	unsigned Len1 = Ty1.getVectorNumElements();
2371	if (Len0 == Len1)
2372	return {Ty0, Ty1};
2373
2374	unsigned MaxLen = std::max(a: Len0, b: Len1);
2375	return {MVT::getVectorVT(VT: Ty0.getVectorElementType(), NumElements: MaxLen),
2376	MVT::getVectorVT(VT: Ty1.getVectorElementType(), NumElements: MaxLen)};
2377	}
2378
2379	MVT
2380	HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2381	EVT LegalTy = getTypeToTransformTo(Context&: *DAG.getContext(), VT: Ty);
2382	assert(LegalTy.isSimple());
2383	return LegalTy.getSimpleVT();
2384	}
2385
2386	MVT
2387	HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2388	unsigned HwWidth = `8` * Subtarget.getVectorLength();
2389	assert(Ty.getSizeInBits() <= HwWidth);
2390	if (Ty.getSizeInBits() == HwWidth)
2391	return Ty;
2392
2393	MVT ElemTy = Ty.getScalarType();
2394	return MVT::getVectorVT(VT: ElemTy, NumElements: HwWidth / ElemTy.getSizeInBits());
2395	}
2396
2397	HexagonTargetLowering::VectorPair
2398	HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2399	const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2400	// Compute A+B, return {A+B, O}, where O = vector predicate indicating
2401	// whether an overflow has occured.
2402	MVT ResTy = ty(Op: A);
2403	assert(ResTy == ty(B));
2404	MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2405
2406	if (!Signed) {
2407	// V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2408	// save any instructions.
2409	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2410	SDValue Ovf = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Add, RHS: A, Cond: ISD::SETULT);
2411	return {Add, Ovf};
2412	}
2413
2414	// Signed overflow has happened, if:
2415	// (A, B have the same sign) and (A+B has a different sign from either)
2416	// i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2417	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2418	SDValue NotA =
2419	DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {A, DAG.getConstant(Val: -`1`, DL: dl, VT: ResTy)});
2420	SDValue Xor0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {NotA, B});
2421	SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {Add, B});
2422	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, Ops: {Xor0, Xor1});
2423	SDValue MSB =
2424	DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: getZero(dl, Ty: ResTy, DAG), Cond: ISD::SETLT);
2425	return {Add, MSB};
2426	}
2427
2428	HexagonTargetLowering::VectorPair
2429	HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2430	bool Signed, SelectionDAG &DAG) const {
2431	// Shift Val right by Amt bits, round the result to the nearest integer,
2432	// tie-break by rounding halves to even integer.
2433
2434	const SDLoc &dl(Val);
2435	MVT ValTy = ty(Op: Val);
2436
2437	// This should also work for signed integers.
2438	//
2439	// uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2440	// bool ovf = (inp > tmp0);
2441	// uint rup = inp & (1 << (Amt+1));
2442	//
2443	// uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2444	// uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2445	// uint tmp3 = tmp2 + rup;
2446	// uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2447	unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2448	MVT ElemTy = MVT::getIntegerVT(BitWidth: ElemWidth);
2449	MVT IntTy = tyVector(Ty: ValTy, ElemTy);
2450	MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2451	unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2452
2453	SDValue Inp = DAG.getBitcast(VT: IntTy, V: Val);
2454	SDValue LowBits = DAG.getConstant(Val: (`1ull` << (Amt - `1`)) - `1`, DL: dl, VT: IntTy);
2455
2456	SDValue AmtP1 = DAG.getConstant(Val: `1ull` << Amt, DL: dl, VT: IntTy);
2457	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntTy, Ops: {Inp, AmtP1});
2458	SDValue Zero = getZero(dl, Ty: IntTy, DAG);
2459	SDValue Bit = DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: Zero, Cond: ISD::SETNE);
2460	SDValue Rup = DAG.getZExtOrTrunc(Op: Bit, DL: dl, VT: IntTy);
2461	auto [Tmp0, Ovf] = emitHvxAddWithOverflow(A: Inp, B: LowBits, dl, Signed, DAG);
2462
2463	SDValue AmtM1 = DAG.getConstant(Val: Amt - `1`, DL: dl, VT: IntTy);
2464	SDValue Tmp1 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Inp, N2: AmtM1);
2465	SDValue Tmp2 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Tmp0, N2: AmtM1);
2466	SDValue Tmp3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntTy, N1: Tmp2, N2: Rup);
2467
2468	SDValue Eq = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Tmp1, RHS: Tmp2, Cond: ISD::SETEQ);
2469	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: IntTy);
2470	SDValue Tmp4 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp2, One});
2471	SDValue Tmp5 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp3, One});
2472	SDValue Mux = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: IntTy, Ops: {Eq, Tmp5, Tmp4});
2473	return {Mux, Ovf};
2474	}
2475
2476	SDValue
2477	HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2478	SelectionDAG &DAG) const {
2479	MVT VecTy = ty(Op: A);
2480	MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2481	assert(VecTy.getVectorElementType() == MVT::i32);
2482
2483	SDValue S16 = DAG.getConstant(`16`, dl, MVT::i32);
2484
2485	// mulhs(A,B) =
2486	// = [(Hi(A)2^16 + Lo(A)) s (Hi(B)2^16 + Lo(B))] >> 32*
2487	// = [Hi(A)2^16 s Hi(B)2^16 + Hi(A) su Lo(B)2^16*
2488	// + Lo(A) us (Hi(B)2^16 + Lo(B))] >> 32
2489	// = [Hi(A) s Hi(B)2^32 + Hi(A) su Lo(B)2^16 + Lo(A) us B] >> 32*
2490	// The low half of Lo(A)Lo(B) will be discarded (it's not added to*
2491	// anything, so it cannot produce any carry over to higher bits),
2492	// so everything in [] can be shifted by 16 without loss of precision.
2493	// = [Hi(A) s Hi(B)2^16 + Hi(A)su Lo(B) + Lo(A)B >> 16] >> 16
2494	// = [Hi(A) s Hi(B)2^16 + Hi(A)su Lo(B) + V6_vmpyewuh(A,B)] >> 16*
2495	// The final additions need to make sure to properly maintain any carry-
2496	// out bits.
2497	//
2498	// Hi(B) Lo(B)
2499	// Hi(A) Lo(A)
2500	// --------------
2501	// Lo(B)Lo(A) \| T0 = V6_vmpyewuh(B,A) does this,*
2502	// Hi(B)Lo(A) \| + dropping the low 16 bits*
2503	// Hi(A)Lo(B) \| T2*
2504	// Hi(B)Hi(A)*
2505
2506	SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2507	// T1 = get Hi(A) into low halves.
2508	SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2509	// P0 = interleaved T1.hB.uh (full precision product)*
2510	SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2511	// T2 = T1.even(h) B.even(uh), i.e. Hi(A)Lo(B)
2512	SDValue T2 = LoHalf(V: P0, DAG);
2513	// We need to add T0+T2, recording the carry-out, which will be 1<<16
2514	// added to the final sum.
2515	// P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2516	SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2517	// P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2518	SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2519	// T3 = full-precision(T0+T2) >> 16
2520	// The low halves are added-unsigned, the high ones are added-signed.
2521	SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2522	{HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2523	SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2524	// P3 = interleaved Hi(B)Hi(A) (full precision),*
2525	// which is now Lo(T1)Lo(T4), so we want to keep the even product.*
2526	SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2527	SDValue T5 = LoHalf(V: P3, DAG);
2528	// Add:
2529	SDValue T6 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {T3, T5});
2530	return T6;
2531	}
2532
2533	SDValue
2534	HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2535	bool SignedB, const SDLoc &dl,
2536	SelectionDAG &DAG) const {
2537	MVT VecTy = ty(Op: A);
2538	MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2539	assert(VecTy.getVectorElementType() == MVT::i32);
2540
2541	SDValue S16 = DAG.getConstant(`16`, dl, MVT::i32);
2542
2543	if (SignedA && !SignedB) {
2544	// Make A:unsigned, B:signed.
2545	std::swap(a&: A, b&: B);
2546	std::swap(a&: SignedA, b&: SignedB);
2547	}
2548
2549	// Do halfword-wise multiplications for unsignedunsigned product, then*
2550	// add corrections for signed and unsignedsigned.*
2551
2552	SDValue Lo, Hi;
2553
2554	// P0:lo = (uu) products of low halves of A and B,
2555	// P0:hi = (uu) products of high halves.
2556	SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2557
2558	// Swap low/high halves in B
2559	SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2560	{DAG.getConstant(`0x02020202`, dl, MVT::i32)}, DAG);
2561	SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2562	// P1 = products of even/odd halfwords.
2563	// P1:lo = (uu) products of even(A.uh) odd(B.uh)*
2564	// P1:hi = (uu) products of odd(A.uh) even(B.uh)*
2565	SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2566
2567	// P2:lo = low halves of P1:lo + P1:hi,
2568	// P2:hi = high halves of P1:lo + P1:hi.
2569	SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2570	{HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2571	// Still need to add the high halves of P0:lo to P2:lo
2572	SDValue T2 =
2573	getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2574	SDValue T3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {LoHalf(V: P2, DAG), T2});
2575
2576	// The high halves of T3 will contribute to the HI part of LOHI.
2577	SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2578	{HiHalf(P2, DAG), T3, S16}, DAG);
2579
2580	// The low halves of P2 need to be added to high halves of the LO part.
2581	Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2582	{LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2583	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {HiHalf(V: P0, DAG), T4});
2584
2585	if (SignedA) {
2586	assert(SignedB && "Signed A and unsigned B should have been inverted");
2587
2588	MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2589	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2590	SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2591	SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2592	SDValue X0 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: VecTy, Ops: {Q0, B, Zero});
2593	SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2594	Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2595	} else if (SignedB) {
2596	// Same correction as for mulhus:
2597	// mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2598	MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2599	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2600	SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2601	Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2602	} else {
2603	assert(!SignedA && !SignedB);
2604	}
2605
2606	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2607	}
2608
2609	SDValue
2610	HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2611	SDValue B, bool SignedB,
2612	const SDLoc &dl,
2613	SelectionDAG &DAG) const {
2614	MVT VecTy = ty(Op: A);
2615	MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2616	assert(VecTy.getVectorElementType() == MVT::i32);
2617
2618	if (SignedA && !SignedB) {
2619	// Make A:unsigned, B:signed.
2620	std::swap(a&: A, b&: B);
2621	std::swap(a&: SignedA, b&: SignedB);
2622	}
2623
2624	// Do SS first, then make corrections for US or UU if needed.*
2625	SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2626	SDValue P1 =
2627	getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2628	SDValue Lo = LoHalf(V: P1, DAG);
2629	SDValue Hi = HiHalf(V: P1, DAG);
2630
2631	if (!SignedB) {
2632	assert(!SignedA && "Signed A and unsigned B should have been inverted");
2633	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2634	MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2635
2636	// Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2637	// def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2638	// (V6_vaddw (HiHalf (Muls64O $A, $B)),
2639	// (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2640	// (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2641	// $A))>;
2642	SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2643	SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2644	SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2645	SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2646	Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2647	} else if (!SignedA) {
2648	SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2649	MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2650
2651	// Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2652	// def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2653	// (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2654	// (HiHalf (Muls64O $A, $B)),
2655	// $B)>;
2656	SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2657	Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2658	}
2659
2660	return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2661	}
2662
2663	SDValue
2664	HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2665	const {
2666	// Rewrite conversion between integer and floating-point in such a way that
2667	// the integer type is extended/narrowed to match the bitwidth of the
2668	// floating-point type, combined with additional integer-integer extensions
2669	// or narrowings to match the original input/result types.
2670	// E.g. f32 -> i8 ==> f32 -> i32 -> i8
2671	//
2672	// The input/result types are not required to be legal, but if they are
2673	// legal, this function should not introduce illegal types.
2674
2675	unsigned Opc = Op.getOpcode();
2676	assert(Opc == ISD::FP_TO_SINT \|\| Opc == ISD::FP_TO_UINT \|\|
2677	Opc == ISD::SINT_TO_FP \|\| Opc == ISD::UINT_TO_FP);
2678
2679	SDValue Inp = Op.getOperand(i: `0`);
2680	MVT InpTy = ty(Op: Inp);
2681	MVT ResTy = ty(Op);
2682
2683	if (InpTy == ResTy)
2684	return Op;
2685
2686	const SDLoc &dl(Op);
2687	bool Signed = Opc == ISD::FP_TO_SINT \|\| Opc == ISD::SINT_TO_FP;
2688
2689	auto [WInpTy, WResTy] = typeExtendToWider(Ty0: InpTy, Ty1: ResTy);
2690	SDValue WInp = resizeToWidth(VecV: Inp, ResTy: WInpTy, Signed, dl, DAG);
2691	SDValue Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: WResTy, Operand: WInp);
2692	SDValue Res = resizeToWidth(VecV: Conv, ResTy, Signed, dl, DAG);
2693	return Res;
2694	}
2695
2696	SDValue
2697	HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2698	unsigned Opc = Op.getOpcode();
2699	assert(Opc == ISD::FP_TO_SINT \|\| Opc == ISD::FP_TO_UINT);
2700
2701	const SDLoc &dl(Op);
2702	SDValue Op0 = Op.getOperand(i: `0`);
2703	MVT InpTy = ty(Op: Op0);
2704	MVT ResTy = ty(Op);
2705	assert(InpTy.changeTypeToInteger() == ResTy);
2706
2707	// int32_t conv_f32_to_i32(uint32_t inp) {
2708	// // s \| exp8 \| frac23
2709	//
2710	// int neg = (int32_t)inp < 0;
2711	//
2712	// // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2713	// // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2714	// // produce a large positive "expm1", which will result in max u/int.
2715	// // In all IEEE formats, bias is the largest positive number that can be
2716	// // represented in bias-width bits (i.e. 011..1).
2717	// int32_t expm1 = (inp << 1) - 0x80000000;
2718	// expm1 >>= 24;
2719	//
2720	// // Always insert the "implicit 1". Subnormal numbers will become 0
2721	// // regardless.
2722	// uint32_t frac = (inp << 8) \| 0x80000000;
2723	//
2724	// // "frac" is the fraction part represented as Q1.31. If it was
2725	// // interpreted as uint32_t, it would be the fraction part multiplied
2726	// // by 2^31.
2727	//
2728	// // Calculate the amount of right shift, since shifting further to the
2729	// // left would lose significant bits. Limit it to 32, because we want
2730	// // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2731	// // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2732	// // left by 31). "rsh" can be negative.
2733	// int32_t rsh = min(31 - (expm1 + 1), 32);
2734	//
2735	// frac >>= rsh; // rsh == 32 will produce 0
2736	//
2737	// // Everything up to this point is the same for conversion to signed
2738	// // unsigned integer.
2739	//
2740	// if (neg) // Only for signed int
2741	// frac = -frac; //
2742	// if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2743	// frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2744	// if (rsh <= 0 && !neg) //
2745	// frac = 0x7fffffff; //
2746	//
2747	// if (neg) // Only for unsigned int
2748	// frac = 0; //
2749	// if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2750	// frac = 0x7fffffff; // frac = neg ? 0 : frac;
2751	//
2752	// return frac;
2753	// }
2754
2755	MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
2756
2757	// Zero = V6_vd0();
2758	// Neg = V6_vgtw(Zero, Inp);
2759	// One = V6_lvsplatw(1);
2760	// M80 = V6_lvsplatw(0x80000000);
2761	// Exp00 = V6_vaslwv(Inp, One);
2762	// Exp01 = V6_vsubw(Exp00, M80);
2763	// ExpM1 = V6_vasrw(Exp01, 24);
2764	// Frc00 = V6_vaslw(Inp, 8);
2765	// Frc01 = V6_vor(Frc00, M80);
2766	// Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2767	// Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2768	// Frc02 = V6_vlsrwv(Frc01, Rsh01);
2769
2770	// if signed int:
2771	// Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2772	// Pos = V6_vgtw(Rsh01, Zero);
2773	// Frc13 = V6_vsubw(Zero, Frc02);
2774	// Frc14 = V6_vmux(Neg, Frc13, Frc02);
2775	// Int = V6_vmux(Pos, Frc14, Bnd);
2776	//
2777	// if unsigned int:
2778	// Rsn = V6_vgtw(Zero, Rsh01)
2779	// Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2780	// Int = V6_vmux(Neg, Zero, Frc23)
2781
2782	auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: InpTy);
2783	unsigned ElemWidth = `1` + ExpWidth + FracWidth;
2784	assert((`1ull` << (ExpWidth - `1`)) == (`1` + ExpBias));
2785
2786	SDValue Inp = DAG.getBitcast(VT: ResTy, V: Op0);
2787	SDValue Zero = getZero(dl, Ty: ResTy, DAG);
2788	SDValue Neg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Inp, RHS: Zero, Cond: ISD::SETLT);
2789	SDValue M80 = DAG.getConstant(Val: `1ull` << (ElemWidth - `1`), DL: dl, VT: ResTy);
2790	SDValue M7F = DAG.getConstant(Val: (`1ull` << (ElemWidth - `1`)) - `1`, DL: dl, VT: ResTy);
2791	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: ResTy);
2792	SDValue Exp00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, One});
2793	SDValue Exp01 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Exp00, M80});
2794	SDValue MNE = DAG.getConstant(Val: ElemWidth - ExpWidth, DL: dl, VT: ResTy);
2795	SDValue ExpM1 = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: ResTy, Ops: {Exp01, MNE});
2796
2797	SDValue ExpW = DAG.getConstant(Val: ExpWidth, DL: dl, VT: ResTy);
2798	SDValue Frc00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, ExpW});
2799	SDValue Frc01 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResTy, Ops: {Frc00, M80});
2800
2801	SDValue MN2 = DAG.getConstant(Val: ElemWidth - `2`, DL: dl, VT: ResTy);
2802	SDValue Rsh00 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {MN2, ExpM1});
2803	SDValue MW = DAG.getConstant(Val: ElemWidth, DL: dl, VT: ResTy);
2804	SDValue Rsh01 = DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT: ResTy, Ops: {Rsh00, MW});
2805	SDValue Frc02 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ResTy, Ops: {Frc01, Rsh01});
2806
2807	SDValue Int;
2808
2809	if (Opc == ISD::FP_TO_SINT) {
2810	SDValue Bnd = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, M80, M7F});
2811	SDValue Pos = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETGT);
2812	SDValue Frc13 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Zero, Frc02});
2813	SDValue Frc14 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, Frc13, Frc02});
2814	Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Pos, Frc14, Bnd});
2815	} else {
2816	assert(Opc == ISD::FP_TO_UINT);
2817	SDValue Rsn = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETLT);
2818	SDValue Frc23 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Rsn, N2: M7F, N3: Frc02);
2819	Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Neg, N2: Zero, N3: Frc23);
2820	}
2821
2822	return Int;
2823	}
2824
2825	SDValue
2826	HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2827	unsigned Opc = Op.getOpcode();
2828	assert(Opc == ISD::SINT_TO_FP \|\| Opc == ISD::UINT_TO_FP);
2829
2830	const SDLoc &dl(Op);
2831	SDValue Op0 = Op.getOperand(i: `0`);
2832	MVT InpTy = ty(Op: Op0);
2833	MVT ResTy = ty(Op);
2834	assert(ResTy.changeTypeToInteger() == InpTy);
2835
2836	// uint32_t vnoc1_rnd(int32_t w) {
2837	// int32_t iszero = w == 0;
2838	// int32_t isneg = w < 0;
2839	// uint32_t u = __builtin_HEXAGON_A2_abs(w);
2840	//
2841	// uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
2842	// uint32_t frac0 = (uint64_t)u << norm_left;
2843	//
2844	// // Rounding:
2845	// uint32_t frac1 = frac0 + ((1 << 8) - 1);
2846	// uint32_t renorm = (frac0 > frac1);
2847	// uint32_t rup = (int)(frac0 << 22) < 0;
2848	//
2849	// uint32_t frac2 = frac0 >> 8;
2850	// uint32_t frac3 = frac1 >> 8;
2851	// uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
2852	//
2853	// int32_t exp = 32 - norm_left + renorm + 127;
2854	// exp <<= 23;
2855	//
2856	// uint32_t sign = 0x80000000 isneg;*
2857	// uint32_t f = sign \| exp \| frac;
2858	// return iszero ? 0 : f;
2859	// }
2860
2861	MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
2862	bool Signed = Opc == ISD::SINT_TO_FP;
2863
2864	auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: ResTy);
2865	unsigned ElemWidth = `1` + ExpWidth + FracWidth;
2866
2867	SDValue Zero = getZero(dl, Ty: InpTy, DAG);
2868	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT: InpTy);
2869	SDValue IsZero = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ);
2870	SDValue Abs = Signed ? DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: InpTy, Operand: Op0) : Op0;
2871	SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: InpTy, Operand: Abs);
2872	SDValue NLeft = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Clz, One});
2873	SDValue Frac0 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, Ops: {Abs, NLeft});
2874
2875	auto [Frac, Ovf] = emitHvxShiftRightRnd(Val: Frac0, Amt: ExpWidth + `1`, Signed: false, DAG);
2876	if (Signed) {
2877	SDValue IsNeg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETLT);
2878	SDValue M80 = DAG.getConstant(Val: `1ull` << (ElemWidth - `1`), DL: dl, VT: InpTy);
2879	SDValue Sign = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsNeg, M80, Zero});
2880	Frac = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Sign, Frac});
2881	}
2882
2883	SDValue Rnrm = DAG.getZExtOrTrunc(Op: Ovf, DL: dl, VT: InpTy);
2884	SDValue Exp0 = DAG.getConstant(Val: ElemWidth + ExpBias, DL: dl, VT: InpTy);
2885	SDValue Exp1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Rnrm, Exp0});
2886	SDValue Exp2 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: InpTy, Ops: {Exp1, NLeft});
2887	SDValue Exp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy,
2888	Ops: {Exp2, DAG.getConstant(Val: FracWidth, DL: dl, VT: InpTy)});
2889	SDValue Flt0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Frac, Exp3});
2890	SDValue Flt1 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsZero, Zero, Flt0});
2891	SDValue Flt = DAG.getBitcast(VT: ResTy, V: Flt1);
2892
2893	return Flt;
2894	}
2895
2896	SDValue
2897	HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2898	unsigned Opc = Op.getOpcode();
2899	unsigned TLOpc;
2900	switch (Opc) {
2901	case ISD::ANY_EXTEND:
2902	case ISD::SIGN_EXTEND:
2903	case ISD::ZERO_EXTEND:
2904	TLOpc = HexagonISD::TL_EXTEND;
2905	break;
2906	case ISD::TRUNCATE:
2907	TLOpc = HexagonISD::TL_TRUNCATE;
2908	break;
2909	#ifndef NDEBUG
2910	Op.dump(G: &DAG);
2911	#endif
2912	llvm_unreachable("Unepected operator");
2913	}
2914
2915	const SDLoc &dl(Op);
2916	return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(`0`),
2917	DAG.getUNDEF(MVT::i128), // illegal type
2918	DAG.getConstant(Opc, dl, MVT::i32));
2919	}
2920
2921	SDValue
2922	HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2923	assert(Op.getOpcode() == HexagonISD::TL_EXTEND \|\|
2924	Op.getOpcode() == HexagonISD::TL_TRUNCATE);
2925	unsigned Opc = Op.getConstantOperandVal(i: `2`);
2926	return DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT: ty(Op), Operand: Op.getOperand(i: `0`));
2927	}
2928
2929	HexagonTargetLowering::VectorPair
2930	HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
2931	assert(!Op.isMachineOpcode());
2932	SmallVector<SDValue, `2`> OpsL, OpsH;
2933	const SDLoc &dl(Op);
2934
2935	auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
2936	MVT Ty = typeSplit(VecTy: N->getVT().getSimpleVT()).first;
2937	SDValue TV = DAG.getValueType(Ty);
2938	return std::make_pair(x&: TV, y&: TV);
2939	};
2940
2941	for (SDValue A : Op.getNode()->ops()) {
2942	auto [Lo, Hi] =
2943	ty(Op: A).isVector() ? opSplit(Vec: A, dl, DAG) : std::make_pair(x&: A, y&: A);
2944	// Special case for type operand.
2945	switch (Op.getOpcode()) {
2946	case ISD::SIGN_EXTEND_INREG:
2947	case HexagonISD::SSAT:
2948	case HexagonISD::USAT:
2949	if (const auto N = dyn_cast<const* VTSDNode>(Val: A.getNode()))
2950	std::tie(args&: Lo, args&: Hi) = SplitVTNode (N);
2951	break;
2952	}
2953	OpsL.push_back(Elt: Lo);
2954	OpsH.push_back(Elt: Hi);
2955	}
2956
2957	MVT ResTy = ty(Op);
2958	MVT HalfTy = typeSplit(VecTy: ResTy).first;
2959	SDValue L = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsL);
2960	SDValue H = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsH);
2961	return {L, H};
2962	}
2963
2964	SDValue
2965	HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
2966	auto *MemN = cast<MemSDNode>(Val: Op.getNode());
2967
2968	MVT MemTy = MemN->getMemoryVT().getSimpleVT();
2969	if (!isHvxPairTy(Ty: MemTy))
2970	return Op;
2971
2972	const SDLoc &dl(Op);
2973	unsigned HwLen = Subtarget.getVectorLength();
2974	MVT SingleTy = typeSplit(VecTy: MemTy).first;
2975	SDValue Chain = MemN->getChain();
2976	SDValue Base0 = MemN->getBasePtr();
2977	SDValue Base1 =
2978	DAG.getMemBasePlusOffset(Base: Base0, Offset: TypeSize::getFixed(ExactSize: HwLen), DL: dl);
2979	unsigned MemOpc = MemN->getOpcode();
2980
2981	MachineMemOperand MOp0 = nullptr, MOp1 = nullptr;
2982	if (MachineMemOperand *MMO = MemN->getMemOperand()) {
2983	MachineFunction &MF = DAG.getMachineFunction();
2984	uint64_t MemSize = (MemOpc == ISD::MLOAD \|\| MemOpc == ISD::MSTORE)
2985	? (uint64_t)MemoryLocation::UnknownSize
2986	: HwLen;
2987	MOp0 = MF.getMachineMemOperand(MMO, Offset: `0`, Size: MemSize);
2988	MOp1 = MF.getMachineMemOperand(MMO, Offset: HwLen, Size: MemSize);
2989	}
2990
2991	if (MemOpc == ISD::LOAD) {
2992	assert(cast<LoadSDNode>(Op)->isUnindexed());
2993	SDValue Load0 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base0, MMO: MOp0);
2994	SDValue Load1 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base1, MMO: MOp1);
2995	return DAG.getMergeValues(
2996	{ DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
2997	DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2998	Load0.getValue(`1`), Load1.getValue(`1`)) }, dl);
2999	}
3000	if (MemOpc == ISD::STORE) {
3001	assert(cast<StoreSDNode>(Op)->isUnindexed());
3002	VectorPair Vals = opSplit(Vec: cast<StoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3003	SDValue Store0 = DAG.getStore(Chain, dl, Val: Vals.first, Ptr: Base0, MMO: MOp0);
3004	SDValue Store1 = DAG.getStore(Chain, dl, Val: Vals.second, Ptr: Base1, MMO: MOp1);
3005	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3006	}
3007
3008	assert(MemOpc == ISD::MLOAD \|\| MemOpc == ISD::MSTORE);
3009
3010	auto MaskN = cast<MaskedLoadStoreSDNode>(Val&: Op);
3011	assert(MaskN->isUnindexed());
3012	VectorPair Masks = opSplit(Vec: MaskN->getMask(), dl, DAG);
3013	SDValue Offset = DAG.getUNDEF(MVT::i32);
3014
3015	if (MemOpc == ISD::MLOAD) {
3016	VectorPair Thru =
3017	opSplit(Vec: cast<MaskedLoadSDNode>(Val&: Op)->getPassThru(), dl, DAG);
3018	SDValue MLoad0 =
3019	DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base0, Offset, Mask: Masks.first,
3020	Src0: Thru.first, MemVT: SingleTy, MMO: MOp0, AM: ISD::UNINDEXED,
3021	ISD::NON_EXTLOAD, IsExpanding: false);
3022	SDValue MLoad1 =
3023	DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base1, Offset, Mask: Masks.second,
3024	Src0: Thru.second, MemVT: SingleTy, MMO: MOp1, AM: ISD::UNINDEXED,
3025	ISD::NON_EXTLOAD, IsExpanding: false);
3026	return DAG.getMergeValues(
3027	{ DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3028	DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3029	MLoad0.getValue(`1`), MLoad1.getValue(`1`)) }, dl);
3030	}
3031	if (MemOpc == ISD::MSTORE) {
3032	VectorPair Vals = opSplit(Vec: cast<MaskedStoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3033	SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Val: Vals.first, Base: Base0, Offset,
3034	Mask: Masks.first, MemVT: SingleTy, MMO: MOp0,
3035	AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3036	SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Val: Vals.second, Base: Base1, Offset,
3037	Mask: Masks.second, MemVT: SingleTy, MMO: MOp1,
3038	AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3039	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3040	}
3041
3042	std::string Name = "Unexpected operation: " + Op ->getOperationName(G: &DAG);
3043	llvm_unreachable(Name.c_str());
3044	}
3045
3046	SDValue
3047	HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3048	const SDLoc &dl(Op);
3049	auto *LoadN = cast<LoadSDNode>(Val: Op.getNode());
3050	assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3051	assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3052	"Not widening loads of i1 yet");
3053
3054	SDValue Chain = LoadN->getChain();
3055	SDValue Base = LoadN->getBasePtr();
3056	SDValue Offset = DAG.getUNDEF(MVT::i32);
3057
3058	MVT ResTy = ty(Op);
3059	unsigned HwLen = Subtarget.getVectorLength();
3060	unsigned ResLen = ResTy.getStoreSize();
3061	assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3062
3063	MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3064	SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3065	{DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3066
3067	MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3068	MachineFunction &MF = DAG.getMachineFunction();
3069	auto *MemOp = MF.getMachineMemOperand(MMO: LoadN->getMemOperand(), Offset: `0`, Size: HwLen);
3070
3071	SDValue Load = DAG.getMaskedLoad(VT: LoadTy, dl, Chain, Base, Offset, Mask,
3072	Src0: DAG.getUNDEF(VT: LoadTy), MemVT: LoadTy, MMO: MemOp,
3073	AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false);
3074	SDValue Value = opCastElem(Vec: Load, ElemTy: ResTy.getVectorElementType(), DAG);
3075	return DAG.getMergeValues(Ops: {Value, Load.getValue(R: `1`)}, dl);
3076	}
3077
3078	SDValue
3079	HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3080	const SDLoc &dl(Op);
3081	auto *StoreN = cast<StoreSDNode>(Val: Op.getNode());
3082	assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3083	assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3084	"Not widening stores of i1 yet");
3085
3086	SDValue Chain = StoreN->getChain();
3087	SDValue Base = StoreN->getBasePtr();
3088	SDValue Offset = DAG.getUNDEF(MVT::i32);
3089
3090	SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3091	MVT ValueTy = ty(Op: Value);
3092	unsigned ValueLen = ValueTy.getVectorNumElements();
3093	unsigned HwLen = Subtarget.getVectorLength();
3094	assert(isPowerOf2_32(ValueLen));
3095
3096	for (unsigned Len = ValueLen; Len < HwLen; ) {
3097	Value = opJoin(Ops: {Value, DAG.getUNDEF(VT: ty(Op: Value))}, dl, DAG);
3098	Len = ty(Op: Value).getVectorNumElements(); // This is Len = 2*
3099	}
3100	assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3101
3102	assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3103	MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3104	SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3105	{DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3106	MachineFunction &MF = DAG.getMachineFunction();
3107	auto *MemOp = MF.getMachineMemOperand(MMO: StoreN->getMemOperand(), Offset: `0`, Size: HwLen);
3108	return DAG.getMaskedStore(Chain, dl, Val: Value, Base, Offset, Mask, MemVT: ty(Op: Value),
3109	MMO: MemOp, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3110	}
3111
3112	SDValue
3113	HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3114	const SDLoc &dl(Op);
3115	SDValue Op0 = Op.getOperand(i: `0`), Op1 = Op.getOperand(i: `1`);
3116	MVT ElemTy = ty(Op: Op0).getVectorElementType();
3117	unsigned HwLen = Subtarget.getVectorLength();
3118
3119	unsigned WideOpLen = (`8` * HwLen) / ElemTy.getSizeInBits();
3120	assert(WideOpLen * ElemTy.getSizeInBits() == `8` * HwLen);
3121	MVT WideOpTy = MVT::getVectorVT(VT: ElemTy, NumElements: WideOpLen);
3122	if (!Subtarget.isHVXVectorType(VecTy: WideOpTy, IncludeBool: true))
3123	return SDValue ();
3124
3125	SDValue WideOp0 = appendUndef(Val: Op0, ResTy: WideOpTy, DAG);
3126	SDValue WideOp1 = appendUndef(Val: Op1, ResTy: WideOpTy, DAG);
3127	EVT ResTy =
3128	getSetCCResultType(DAG.getDataLayout(), C&: *DAG.getContext(), VT: WideOpTy);
3129	SDValue SetCC = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: ResTy,
3130	Ops: {WideOp0, WideOp1, Op.getOperand(i: `2`)});
3131
3132	EVT RetTy = typeLegalize(Ty: ty(Op), DAG);
3133	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3134	{SetCC, getZero(dl, MVT::i32, DAG)});
3135	}
3136
3137	SDValue
3138	HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3139	unsigned Opc = Op.getOpcode();
3140	bool IsPairOp = isHvxPairTy(Ty: ty(Op)) \|\|
3141	llvm::any_of(Range: Op.getNode()->ops(), P: [this] (SDValue V) {
3142	return isHvxPairTy(Ty: ty(Op: V));
3143	});
3144
3145	if (IsPairOp) {
3146	switch (Opc) {
3147	default:
3148	break;
3149	case ISD::LOAD:
3150	case ISD::STORE:
3151	case ISD::MLOAD:
3152	case ISD::MSTORE:
3153	return SplitHvxMemOp(Op, DAG);
3154	case ISD::SINT_TO_FP:
3155	case ISD::UINT_TO_FP:
3156	case ISD::FP_TO_SINT:
3157	case ISD::FP_TO_UINT:
3158	if (ty(Op).getSizeInBits() == ty(Op: Op.getOperand(i: `0`)).getSizeInBits())
3159	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3160	break;
3161	case ISD::ABS:
3162	case ISD::CTPOP:
3163	case ISD::CTLZ:
3164	case ISD::CTTZ:
3165	case ISD::MUL:
3166	case ISD::FADD:
3167	case ISD::FSUB:
3168	case ISD::FMUL:
3169	case ISD::FMINNUM:
3170	case ISD::FMAXNUM:
3171	case ISD::MULHS:
3172	case ISD::MULHU:
3173	case ISD::AND:
3174	case ISD::OR:
3175	case ISD::XOR:
3176	case ISD::SRA:
3177	case ISD::SHL:
3178	case ISD::SRL:
3179	case ISD::FSHL:
3180	case ISD::FSHR:
3181	case ISD::SMIN:
3182	case ISD::SMAX:
3183	case ISD::UMIN:
3184	case ISD::UMAX:
3185	case ISD::SETCC:
3186	case ISD::VSELECT:
3187	case ISD::SIGN_EXTEND_INREG:
3188	case ISD::SPLAT_VECTOR:
3189	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3190	case ISD::SIGN_EXTEND:
3191	case ISD::ZERO_EXTEND:
3192	// In general, sign- and zero-extends can't be split and still
3193	// be legal. The only exception is extending bool vectors.
3194	if (ty(Op.getOperand(`0`)).getVectorElementType() == MVT::i1)
3195	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3196	break;
3197	}
3198	}
3199
3200	switch (Opc) {
3201	default:
3202	break;
3203	case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3204	case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3205	case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3206	case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3207	case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3208	case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3209	case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3210	case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3211	case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3212	case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3213	case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3214	case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3215	case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3216	case ISD::SRA:
3217	case ISD::SHL:
3218	case ISD::SRL: return LowerHvxShift(Op, DAG);
3219	case ISD::FSHL:
3220	case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3221	case ISD::MULHS:
3222	case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3223	case ISD::SMUL_LOHI:
3224	case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3225	case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3226	case ISD::SETCC:
3227	case ISD::INTRINSIC_VOID: return Op;
3228	case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3229	case ISD::MLOAD:
3230	case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3231	// Unaligned loads will be handled by the default lowering.
3232	case ISD::LOAD: return SDValue ();
3233	case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3234	case ISD::FP_TO_SINT:
3235	case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3236	case ISD::SINT_TO_FP:
3237	case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3238
3239	// Special nodes:
3240	case HexagonISD::SMUL_LOHI:
3241	case HexagonISD::UMUL_LOHI:
3242	case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3243	}
3244	#ifndef NDEBUG
3245	Op.dumpr(G: &DAG);
3246	#endif
3247	llvm_unreachable("Unhandled HVX operation");
3248	}
3249
3250	SDValue
3251	HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3252	const {
3253	// Rewrite the extension/truncation/saturation op into steps where each
3254	// step changes the type widths by a factor of 2.
3255	// E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3256	//
3257	// Some of the vector types in Op may not be legal.
3258
3259	unsigned Opc = Op.getOpcode();
3260	switch (Opc) {
3261	case HexagonISD::SSAT:
3262	case HexagonISD::USAT:
3263	case HexagonISD::TL_EXTEND:
3264	case HexagonISD::TL_TRUNCATE:
3265	break;
3266	case ISD::ANY_EXTEND:
3267	case ISD::ZERO_EXTEND:
3268	case ISD::SIGN_EXTEND:
3269	case ISD::TRUNCATE:
3270	llvm_unreachable("ISD:: ops will be auto-folded");
3271	break;
3272	#ifndef NDEBUG
3273	Op.dump(G: &DAG);
3274	#endif
3275	llvm_unreachable("Unexpected operation");
3276	}
3277
3278	SDValue Inp = Op.getOperand(i: `0`);
3279	MVT InpTy = ty(Op: Inp);
3280	MVT ResTy = ty(Op);
3281
3282	unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3283	unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3284	assert(InpWidth != ResWidth);
3285
3286	if (InpWidth == `2` * ResWidth \|\| ResWidth == `2` * InpWidth)
3287	return Op;
3288
3289	const SDLoc &dl(Op);
3290	unsigned NumElems = InpTy.getVectorNumElements();
3291	assert(NumElems == ResTy.getVectorNumElements());
3292
3293	auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3294	MVT Ty = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NewWidth), NumElements: NumElems);
3295	switch (Opc) {
3296	case HexagonISD::SSAT:
3297	case HexagonISD::USAT:
3298	return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, DAG.getValueType(Ty)});
3299	case HexagonISD::TL_EXTEND:
3300	case HexagonISD::TL_TRUNCATE:
3301	return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, Op.getOperand(i: `1`), Op.getOperand(i: `2`)});
3302	default:
3303	llvm_unreachable("Unexpected opcode");
3304	}
3305	};
3306
3307	SDValue S = Inp;
3308	if (InpWidth < ResWidth) {
3309	assert(ResWidth % InpWidth == `0` && isPowerOf2_32(ResWidth / InpWidth));
3310	while (InpWidth * `2` <= ResWidth)
3311	S = repeatOp (InpWidth *= `2`, S);
3312	} else {
3313	// InpWidth > ResWidth
3314	assert(InpWidth % ResWidth == `0` && isPowerOf2_32(InpWidth / ResWidth));
3315	while (InpWidth / `2` >= ResWidth)
3316	S = repeatOp (InpWidth /= `2`, S);
3317	}
3318	return S;
3319	}
3320
3321	SDValue
3322	HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3323	SDValue Inp0 = Op.getOperand(i: `0`);
3324	MVT InpTy = ty(Op: Inp0);
3325	MVT ResTy = ty(Op);
3326	unsigned InpWidth = InpTy.getSizeInBits();
3327	unsigned ResWidth = ResTy.getSizeInBits();
3328	unsigned Opc = Op.getOpcode();
3329
3330	if (shouldWidenToHvx(Ty: InpTy, DAG) \|\| shouldWidenToHvx(Ty: ResTy, DAG)) {
3331	// First, make sure that the narrower type is widened to HVX.
3332	// This may cause the result to be wider than what the legalizer
3333	// expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3334	// desired type.
3335	auto [WInpTy, WResTy] =
3336	InpWidth < ResWidth ? typeWidenToWider(Ty0: typeWidenToHvx(Ty: InpTy), Ty1: ResTy)
3337	: typeWidenToWider(Ty0: InpTy, Ty1: typeWidenToHvx(Ty: ResTy));
3338	SDValue W = appendUndef(Val: Inp0, ResTy: WInpTy, DAG);
3339	SDValue S;
3340	if (Opc == HexagonISD::TL_EXTEND \|\| Opc == HexagonISD::TL_TRUNCATE) {
3341	S = DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT: WResTy, N1: W, N2: Op.getOperand(i: `1`),
3342	N3: Op.getOperand(i: `2`));
3343	} else {
3344	S = DAG.getNode(Opcode: Opc, DL: SDLoc (Op), VT: WResTy, N1: W, N2: DAG.getValueType(WResTy));
3345	}
3346	SDValue T = ExpandHvxResizeIntoSteps(Op: S, DAG);
3347	return extractSubvector(Vec: T, SubTy: typeLegalize(Ty: ResTy, DAG), SubIdx: `0`, DAG);
3348	} else if (shouldSplitToHvx(Ty: InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3349	return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc (Op), DAG);
3350	} else {
3351	assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3352	return RemoveTLWrapper(Op, DAG);
3353	}
3354	llvm_unreachable("Unexpected situation");
3355	}
3356
3357	void
3358	HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3359	SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3360	unsigned Opc = N->getOpcode();
3361	SDValue Op(N, `0`);
3362	SDValue Inp0; // Optional first argument.
3363	if (N->getNumOperands() > `0`)
3364	Inp0 = Op.getOperand(i: `0`);
3365
3366	switch (Opc) {
3367	case ISD::ANY_EXTEND:
3368	case ISD::SIGN_EXTEND:
3369	case ISD::ZERO_EXTEND:
3370	case ISD::TRUNCATE:
3371	if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3372	Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3373	Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3374	}
3375	break;
3376	case ISD::SETCC:
3377	if (shouldWidenToHvx(Ty: ty(Op: Inp0), DAG)) {
3378	if (SDValue T = WidenHvxSetCC(Op, DAG))
3379	Results.push_back(Elt: T);
3380	}
3381	break;
3382	case ISD::STORE: {
3383	if (shouldWidenToHvx(Ty: ty(Op: cast<StoreSDNode>(Val: N)->getValue()), DAG)) {
3384	SDValue Store = WidenHvxStore(Op, DAG);
3385	Results.push_back(Elt: Store);
3386	}
3387	break;
3388	}
3389	case ISD::MLOAD:
3390	if (isHvxPairTy(Ty: ty(Op))) {
3391	SDValue S = SplitHvxMemOp(Op, DAG);
3392	assert(S ->getOpcode() == ISD::MERGE_VALUES);
3393	Results.push_back(Elt: S.getOperand(i: `0`));
3394	Results.push_back(Elt: S.getOperand(i: `1`));
3395	}
3396	break;
3397	case ISD::MSTORE:
3398	if (isHvxPairTy(Ty: ty(Op: Op ->getOperand(Num: `1`)))) { // Stored value
3399	SDValue S = SplitHvxMemOp(Op, DAG);
3400	Results.push_back(Elt: S);
3401	}
3402	break;
3403	case ISD::SINT_TO_FP:
3404	case ISD::UINT_TO_FP:
3405	case ISD::FP_TO_SINT:
3406	case ISD::FP_TO_UINT:
3407	if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3408	SDValue T = EqualizeFpIntConversion(Op, DAG);
3409	Results.push_back(Elt: T);
3410	}
3411	break;
3412	case HexagonISD::SSAT:
3413	case HexagonISD::USAT:
3414	case HexagonISD::TL_EXTEND:
3415	case HexagonISD::TL_TRUNCATE:
3416	Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3417	break;
3418	default:
3419	break;
3420	}
3421	}
3422
3423	void
3424	HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3425	SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3426	unsigned Opc = N->getOpcode();
3427	SDValue Op(N, `0`);
3428	SDValue Inp0; // Optional first argument.
3429	if (N->getNumOperands() > `0`)
3430	Inp0 = Op.getOperand(i: `0`);
3431
3432	switch (Opc) {
3433	case ISD::ANY_EXTEND:
3434	case ISD::SIGN_EXTEND:
3435	case ISD::ZERO_EXTEND:
3436	case ISD::TRUNCATE:
3437	if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3438	Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3439	Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3440	}
3441	break;
3442	case ISD::SETCC:
3443	if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3444	if (SDValue T = WidenHvxSetCC(Op, DAG))
3445	Results.push_back(Elt: T);
3446	}
3447	break;
3448	case ISD::LOAD: {
3449	if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3450	SDValue Load = WidenHvxLoad(Op, DAG);
3451	assert(Load ->getOpcode() == ISD::MERGE_VALUES);
3452	Results.push_back(Elt: Load.getOperand(i: `0`));
3453	Results.push_back(Elt: Load.getOperand(i: `1`));
3454	}
3455	break;
3456	}
3457	case ISD::BITCAST:
3458	if (isHvxBoolTy(Ty: ty(Op: Inp0))) {
3459	SDValue C = LowerHvxBitcast(Op, DAG);
3460	Results.push_back(Elt: C);
3461	}
3462	break;
3463	case ISD::FP_TO_SINT:
3464	case ISD::FP_TO_UINT:
3465	if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3466	SDValue T = EqualizeFpIntConversion(Op, DAG);
3467	Results.push_back(Elt: T);
3468	}
3469	break;
3470	case HexagonISD::SSAT:
3471	case HexagonISD::USAT:
3472	case HexagonISD::TL_EXTEND:
3473	case HexagonISD::TL_TRUNCATE:
3474	Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3475	break;
3476	default:
3477	break;
3478	}
3479	}
3480
3481	SDValue
3482	HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3483	DAGCombinerInfo &DCI) const {
3484	// Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3485	// to extract-subvector (shuffle V, pick even, pick odd)
3486
3487	assert(Op.getOpcode() == ISD::TRUNCATE);
3488	SelectionDAG &DAG = DCI.DAG;
3489	const SDLoc &dl(Op);
3490
3491	if (Op.getOperand(i: `0`).getOpcode() == ISD::BITCAST)
3492	return SDValue ();
3493	SDValue Cast = Op.getOperand(i: `0`);
3494	SDValue Src = Cast.getOperand(i: `0`);
3495
3496	EVT TruncTy = Op.getValueType();
3497	EVT CastTy = Cast.getValueType();
3498	EVT SrcTy = Src.getValueType();
3499	if (SrcTy.isSimple())
3500	return SDValue ();
3501	if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3502	return SDValue ();
3503	unsigned SrcLen = SrcTy.getVectorNumElements();
3504	unsigned CastLen = CastTy.getVectorNumElements();
3505	if (`2` * CastLen != SrcLen)
3506	return SDValue ();
3507
3508	SmallVector<int, `128`> Mask(SrcLen);
3509	for (int i = `0`; i != static_cast<int>(CastLen); ++i) {
3510	Mask [i] = `2` * i;
3511	Mask [i + CastLen] = `2` * i + `1`;
3512	}
3513	SDValue Deal =
3514	DAG.getVectorShuffle(VT: SrcTy, dl, N1: Src, N2: DAG.getUNDEF(VT: SrcTy), Mask);
3515	return opSplit(Vec: Deal, dl, DAG).first;
3516	}
3517
3518	SDValue
3519	HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3520	SDValue Op, DAGCombinerInfo &DCI) const {
3521	// Fold
3522	// concat (shuffle x, y, m1), (shuffle x, y, m2)
3523	// into
3524	// shuffle (concat x, y), undef, m3
3525	if (Op.getNumOperands() != `2`)
3526	return SDValue ();
3527
3528	SelectionDAG &DAG = DCI.DAG;
3529	const SDLoc &dl(Op);
3530	SDValue V0 = Op.getOperand(i: `0`);
3531	SDValue V1 = Op.getOperand(i: `1`);
3532
3533	if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3534	return SDValue ();
3535	if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3536	return SDValue ();
3537
3538	SetVector<SDValue> Order;
3539	Order.insert(X: V0.getOperand(i: `0`));
3540	Order.insert(X: V0.getOperand(i: `1`));
3541	Order.insert(X: V1.getOperand(i: `0`));
3542	Order.insert(X: V1.getOperand(i: `1`));
3543
3544	if (Order.size() > `2`)
3545	return SDValue ();
3546
3547	// In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3548	// result must be the same.
3549	EVT InpTy = V0.getValueType();
3550	assert(InpTy.isVector());
3551	unsigned InpLen = InpTy.getVectorNumElements();
3552
3553	SmallVector<int, `128`> LongMask;
3554	auto AppendToMask = [&](SDValue Shuffle) {
3555	auto *SV = cast<ShuffleVectorSDNode>(Val: Shuffle.getNode());
3556	ArrayRef<int> Mask = SV->getMask();
3557	SDValue X = Shuffle.getOperand(i: `0`);
3558	SDValue Y = Shuffle.getOperand(i: `1`);
3559	for (int M : Mask) {
3560	if (M == -`1`) {
3561	LongMask.push_back(Elt: M);
3562	continue;
3563	}
3564	SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3565	if (static_cast<unsigned>(M) >= InpLen)
3566	M -= InpLen;
3567
3568	int OutOffset = Order [`0`] == Src ? `0` : InpLen;
3569	LongMask.push_back(Elt: M + OutOffset);
3570	}
3571	};
3572
3573	AppendToMask (V0);
3574	AppendToMask (V1);
3575
3576	SDValue C0 = Order.front();
3577	SDValue C1 = Order.back(); // Can be same as front
3578	EVT LongTy = InpTy.getDoubleNumVectorElementsVT(Context&: *DAG.getContext());
3579
3580	SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: LongTy, Ops: {C0, C1});
3581	return DAG.getVectorShuffle(VT: LongTy, dl, N1: Cat, N2: DAG.getUNDEF(VT: LongTy), Mask: LongMask);
3582	}
3583
3584	SDValue
3585	HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3586	const {
3587	const SDLoc &dl(N);
3588	SelectionDAG &DAG = DCI.DAG;
3589	SDValue Op(N, `0`);
3590	unsigned Opc = Op.getOpcode();
3591
3592	SmallVector<SDValue, `4`> Ops(N->ops().begin(), N->ops().end());
3593
3594	if (Opc == ISD::TRUNCATE)
3595	return combineTruncateBeforeLegal(Op, DCI);
3596	if (Opc == ISD::CONCAT_VECTORS)
3597	return combineConcatVectorsBeforeLegal(Op, DCI);
3598
3599	if (DCI.isBeforeLegalizeOps())
3600	return SDValue ();
3601
3602	switch (Opc) {
3603	case ISD::VSELECT: {
3604	// (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3605	SDValue Cond = Ops [`0`];
3606	if (Cond ->getOpcode() == ISD::XOR) {
3607	SDValue C0 = Cond.getOperand(i: `0`), C1 = Cond.getOperand(i: `1`);
3608	if (C1 ->getOpcode() == HexagonISD::QTRUE)
3609	return DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ty(Op), N1: C0, N2: Ops [`2`], N3: Ops [`1`]);
3610	}
3611	break;
3612	}
3613	case HexagonISD::V2Q:
3614	if (Ops [`0`].getOpcode() == ISD::SPLAT_VECTOR) {
3615	if (const auto *C = dyn_cast<ConstantSDNode>(Val: Ops [`0`].getOperand(i: `0`)))
3616	return C->isZero() ? DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: ty(Op))
3617	: DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: ty(Op));
3618	}
3619	break;
3620	case HexagonISD::Q2V:
3621	if (Ops[`0`].getOpcode() == HexagonISD::QTRUE)
3622	return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3623	DAG.getConstant(-`1`, dl, MVT::i32));
3624	if (Ops [`0`].getOpcode() == HexagonISD::QFALSE)
3625	return getZero(dl, Ty: ty(Op), DAG);
3626	break;
3627	case HexagonISD::VINSERTW0:
3628	if (isUndef(Op: Ops [`1`]))
3629	return Ops [`0`];
3630	break;
3631	case HexagonISD::VROR: {
3632	if (Ops [`0`].getOpcode() == HexagonISD::VROR) {
3633	SDValue Vec = Ops [`0`].getOperand(i: `0`);
3634	SDValue Rot0 = Ops [`1`], Rot1 = Ops [`0`].getOperand(i: `1`);
3635	SDValue Rot = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ty(Op: Rot0), Ops: {Rot0, Rot1});
3636	return DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ty(Op), Ops: {Vec, Rot});
3637	}
3638	break;
3639	}
3640	}
3641
3642	return SDValue ();
3643	}
3644
3645	bool
3646	HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3647	if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
3648	return false;
3649	auto Action = getPreferredHvxVectorAction(VecTy: Ty);
3650	if (Action == TargetLoweringBase::TypeSplitVector)
3651	return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
3652	return false;
3653	}
3654
3655	bool
3656	HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3657	if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
3658	return false;
3659	auto Action = getPreferredHvxVectorAction(VecTy: Ty);
3660	if (Action == TargetLoweringBase::TypeWidenVector)
3661	return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
3662	return false;
3663	}
3664
3665	bool
3666	HexagonTargetLowering::isHvxOperation(SDNode N, SelectionDAG &DAG) const* {
3667	if (!Subtarget.useHVXOps())
3668	return false;
3669	// If the type of any result, or any operand type are HVX vector types,
3670	// this is an HVX operation.
3671	auto IsHvxTy = [this](EVT Ty) {
3672	return Ty.isSimple() && Subtarget.isHVXVectorType(VecTy: Ty.getSimpleVT(), IncludeBool: true);
3673	};
3674	auto IsHvxOp = [this](SDValue Op) {
3675	return Op.getValueType().isSimple() &&
3676	Subtarget.isHVXVectorType(VecTy: ty(Op), IncludeBool: true);
3677	};
3678	if (llvm::any_of(Range: N->values(), P: IsHvxTy) \|\| llvm::any_of(Range: N->ops(), P: IsHvxOp))
3679	return true;
3680
3681	// Check if this could be an HVX operation after type widening.
3682	auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3683	if (!Op.getValueType().isSimple())
3684	return false;
3685	MVT ValTy = ty(Op);
3686	return ValTy.isVector() && shouldWidenToHvx(Ty: ValTy, DAG);
3687	};
3688
3689	for (int i = `0`, e = N->getNumValues(); i != e; ++i) {
3690	if (IsWidenedToHvx (SDValue (N, i)))
3691	return true;
3692	}
3693	return llvm::any_of(Range: N->ops(), P: IsWidenedToHvx);
3694	}
3695

source code of llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp