WebAssemblyISelLowering.cpp source code [llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp]

1	//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements the WebAssemblyTargetLowering class.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "WebAssemblyISelLowering.h"
15	#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16	#include "Utils/WebAssemblyTypeUtilities.h"
17	#include "WebAssemblyMachineFunctionInfo.h"
18	#include "WebAssemblySubtarget.h"
19	#include "WebAssemblyTargetMachine.h"
20	#include "WebAssemblyUtilities.h"
21	#include "llvm/CodeGen/CallingConvLower.h"
22	#include "llvm/CodeGen/MachineFrameInfo.h"
23	#include "llvm/CodeGen/MachineFunctionPass.h"
24	#include "llvm/CodeGen/MachineInstrBuilder.h"
25	#include "llvm/CodeGen/MachineJumpTableInfo.h"
26	#include "llvm/CodeGen/MachineModuleInfo.h"
27	#include "llvm/CodeGen/MachineRegisterInfo.h"
28	#include "llvm/CodeGen/SelectionDAG.h"
29	#include "llvm/CodeGen/SelectionDAGNodes.h"
30	#include "llvm/IR/DiagnosticInfo.h"
31	#include "llvm/IR/DiagnosticPrinter.h"
32	#include "llvm/IR/Function.h"
33	#include "llvm/IR/Intrinsics.h"
34	#include "llvm/IR/IntrinsicsWebAssembly.h"
35	#include "llvm/IR/PatternMatch.h"
36	#include "llvm/Support/Debug.h"
37	#include "llvm/Support/ErrorHandling.h"
38	#include "llvm/Support/KnownBits.h"
39	#include "llvm/Support/MathExtras.h"
40	#include "llvm/Support/raw_ostream.h"
41	#include "llvm/Target/TargetOptions.h"
42	using namespace llvm;
43
44	#define DEBUG_TYPE "wasm-lower"
45
46	WebAssemblyTargetLowering::WebAssemblyTargetLowering(
47	const TargetMachine &TM, const WebAssemblySubtarget &STI)
48	: TargetLowering (TM), Subtarget(&STI) {
49	auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
50
51	// Booleans always contain 0 or 1.
52	setBooleanContents(ZeroOrOneBooleanContent);
53	// Except in SIMD vectors
54	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
55	// We don't know the microarchitecture here, so just reduce register pressure.
56	setSchedulingPreference(Sched::RegPressure);
57	// Tell ISel that we have a stack pointer.
58	setStackPointerRegisterToSaveRestore(
59	Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
60	// Set up the register classes.
61	addRegisterClass(MVT::VT: i32, RC: &WebAssembly::I32RegClass);
62	addRegisterClass(MVT::VT: i64, RC: &WebAssembly::I64RegClass);
63	addRegisterClass(MVT::VT: f32, RC: &WebAssembly::F32RegClass);
64	addRegisterClass(MVT::VT: f64, RC: &WebAssembly::F64RegClass);
65	if (Subtarget->hasSIMD128()) {
66	addRegisterClass(MVT::VT: v16i8, RC: &WebAssembly::V128RegClass);
67	addRegisterClass(MVT::VT: v8i16, RC: &WebAssembly::V128RegClass);
68	addRegisterClass(MVT::VT: v4i32, RC: &WebAssembly::V128RegClass);
69	addRegisterClass(MVT::VT: v4f32, RC: &WebAssembly::V128RegClass);
70	addRegisterClass(MVT::VT: v2i64, RC: &WebAssembly::V128RegClass);
71	addRegisterClass(MVT::VT: v2f64, RC: &WebAssembly::V128RegClass);
72	}
73	if (Subtarget->hasReferenceTypes()) {
74	addRegisterClass(MVT::VT: externref, RC: &WebAssembly::EXTERNREFRegClass);
75	addRegisterClass(MVT::VT: funcref, RC: &WebAssembly::FUNCREFRegClass);
76	}
77	// Compute derived properties from the register classes.
78	computeRegisterProperties(Subtarget->getRegisterInfo());
79
80	// Transform loads and stores to pointers in address space 1 to loads and
81	// stores to WebAssembly global variables, outside linear memory.
82	for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
83	setOperationAction(ISD::LOAD, T, Custom);
84	setOperationAction(ISD::STORE, T, Custom);
85	}
86	if (Subtarget->hasSIMD128()) {
87	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
88	MVT::v2f64}) {
89	setOperationAction(ISD::LOAD, T, Custom);
90	setOperationAction(ISD::STORE, T, Custom);
91	}
92	}
93	if (Subtarget->hasReferenceTypes()) {
94	// We need custom load and store lowering for both externref, funcref and
95	// Other. The MVT::Other here represents tables of reference types.
96	for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
97	setOperationAction(ISD::LOAD, T, Custom);
98	setOperationAction(ISD::STORE, T, Custom);
99	}
100	}
101
102	setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
103	setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom);
104	setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
105	setOperationAction(ISD::JumpTable, MVTPtr, Custom);
106	setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
107	setOperationAction(ISD::BRIND, MVT::Other, Custom);
108
109	// Take the default expansion for va_arg, va_copy, and va_end. There is no
110	// default action for va_start, so we do that custom.
111	setOperationAction(ISD::VASTART, MVT::Other, Custom);
112	setOperationAction(ISD::VAARG, MVT::Other, Expand);
113	setOperationAction(ISD::VACOPY, MVT::Other, Expand);
114	setOperationAction(ISD::VAEND, MVT::Other, Expand);
115
116	for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
117	// Don't expand the floating-point types to constant pools.
118	setOperationAction(ISD::ConstantFP, T, Legal);
119	// Expand floating-point comparisons.
120	for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
121	ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
122	setCondCodeAction(CC, T, Expand);
123	// Expand floating-point library function operators.
124	for (auto Op :
125	{ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
126	setOperationAction(Op, T, Expand);
127	// Note supported floating-point library function operators that otherwise
128	// default to expand.
129	for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
130	ISD::FRINT, ISD::FROUNDEVEN})
131	setOperationAction(Op, T, Legal);
132	// Support minimum and maximum, which otherwise default to expand.
133	setOperationAction(ISD::FMINIMUM, T, Legal);
134	setOperationAction(ISD::FMAXIMUM, T, Legal);
135	// WebAssembly currently has no builtin f16 support.
136	setOperationAction(ISD::FP16_TO_FP, T, Expand);
137	setOperationAction(ISD::FP_TO_FP16, T, Expand);
138	setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
139	setTruncStoreAction(T, MVT::f16, Expand);
140	}
141
142	// Expand unavailable integer operations.
143	for (auto Op :
144	{ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
145	ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
146	ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
147	for (auto T : {MVT::i32, MVT::i64})
148	setOperationAction(Op, T, Expand);
149	if (Subtarget->hasSIMD128())
150	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
151	setOperationAction(Op, T, Expand);
152	}
153
154	if (Subtarget->hasNontrappingFPToInt())
155	for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
156	for (auto T : {MVT::i32, MVT::i64})
157	setOperationAction(Op, T, Custom);
158
159	// SIMD-specific configuration
160	if (Subtarget->hasSIMD128()) {
161	// Combine vector mask reductions into alltrue/anytrue
162	setTargetDAGCombine(ISD::SETCC);
163
164	// Convert vector to integer bitcasts to bitmask
165	setTargetDAGCombine(ISD::BITCAST);
166
167	// Hoist bitcasts out of shuffles
168	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
169
170	// Combine extends of extract_subvectors into widening ops
171	setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND});
172
173	// Combine int_to_fp or fp_extend of extract_vectors and vice versa into
174	// conversions ops
175	setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND,
176	ISD::EXTRACT_SUBVECTOR});
177
178	// Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
179	// into conversion ops
180	setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
181	ISD::FP_ROUND, ISD::CONCAT_VECTORS});
182
183	setTargetDAGCombine(ISD::TRUNCATE);
184
185	// Support saturating add for i8x16 and i16x8
186	for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
187	for (auto T : {MVT::v16i8, MVT::v8i16})
188	setOperationAction(Op, T, Legal);
189
190	// Support integer abs
191	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
192	setOperationAction(ISD::ABS, T, Legal);
193
194	// Custom lower BUILD_VECTORs to minimize number of replace_lanes
195	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
196	MVT::v2f64})
197	setOperationAction(ISD::BUILD_VECTOR, T, Custom);
198
199	// We have custom shuffle lowering to expose the shuffle mask
200	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
201	MVT::v2f64})
202	setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
203
204	// Support splatting
205	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
206	MVT::v2f64})
207	setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
208
209	// Custom lowering since wasm shifts must have a scalar shift amount
210	for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
211	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
212	setOperationAction(Op, T, Custom);
213
214	// Custom lower lane accesses to expand out variable indices
215	for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
216	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
217	MVT::v2f64})
218	setOperationAction(Op, T, Custom);
219
220	// There is no i8x16.mul instruction
221	setOperationAction(ISD::MUL, MVT::v16i8, Expand);
222
223	// There is no vector conditional select instruction
224	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
225	MVT::v2f64})
226	setOperationAction(ISD::SELECT_CC, T, Expand);
227
228	// Expand integer operations supported for scalars but not SIMD
229	for (auto Op :
230	{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
231	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
232	setOperationAction(Op, T, Expand);
233
234	// But we do have integer min and max operations
235	for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
236	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
237	setOperationAction(Op, T, Legal);
238
239	// And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
240	setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
241	setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
242	setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
243
244	// Custom lower bit counting operations for other types to scalarize them.
245	for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
246	for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
247	setOperationAction(Op, T, Custom);
248
249	// Expand float operations supported for scalars but not SIMD
250	for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
251	ISD::FEXP, ISD::FEXP2})
252	for (auto T : {MVT::v4f32, MVT::v2f64})
253	setOperationAction(Op, T, Expand);
254
255	// Unsigned comparison operations are unavailable for i64x2 vectors.
256	for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE})
257	setCondCodeAction(CC, MVT::v2i64, Custom);
258
259	// 64x2 conversions are not in the spec
260	for (auto Op :
261	{ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
262	for (auto T : {MVT::v2i64, MVT::v2f64})
263	setOperationAction(Op, T, Expand);
264
265	// But saturating fp_to_int converstions are
266	for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
267	setOperationAction(Op, MVT::v4i32, Custom);
268
269	// Support vector extending
270	for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
271	setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Custom);
272	setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
273	}
274	}
275
276	// As a special case, these operators use the type to mean the type to
277	// sign-extend from.
278	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
279	if (!Subtarget->hasSignExt()) {
280	// Sign extends are legal only when extending a vector extract
281	auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
282	for (auto T : {MVT::i8, MVT::i16, MVT::i32})
283	setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
284	}
285	for (auto T : MVT::integer_fixedlen_vector_valuetypes())
286	setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
287
288	// Dynamic stack allocation: use the default expansion.
289	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
290	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
291	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
292
293	setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
294	setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
295	setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
296
297	// Expand these forms; we pattern-match the forms that we can handle in isel.
298	for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
299	for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
300	setOperationAction(Op, T, Expand);
301
302	// We have custom switch handling.
303	setOperationAction(ISD::BR_JT, MVT::Other, Custom);
304
305	// WebAssembly doesn't have:
306	// - Floating-point extending loads.
307	// - Floating-point truncating stores.
308	// - i1 extending loads.
309	// - truncating SIMD stores and most extending loads
310	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
311	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
312	for (auto T : MVT::integer_valuetypes())
313	for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
314	setLoadExtAction(Ext, T, MVT::i1, Promote);
315	if (Subtarget->hasSIMD128()) {
316	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
317	MVT::v2f64}) {
318	for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
319	if (MVT(T) != MemT) {
320	setTruncStoreAction(T, MemT, Expand);
321	for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
322	setLoadExtAction(Ext, T, MemT, Expand);
323	}
324	}
325	}
326	// But some vector extending loads are legal
327	for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
328	setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
329	setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
330	setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
331	}
332	setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
333	}
334
335	// Don't do anything clever with build_pairs
336	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
337
338	// Trap lowers to wasm unreachable
339	setOperationAction(ISD::TRAP, MVT::Other, Legal);
340	setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
341
342	// Exception handling intrinsics
343	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
344	setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
345	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
346
347	setMaxAtomicSizeInBitsSupported(`64`);
348
349	// Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
350	// consistent with the f64 and f128 names.
351	setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__extendhfsf2");
352	setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__truncsfhf2");
353
354	// Define the emscripten name for return address helper.
355	// TODO: when implementing other Wasm backends, make this generic or only do
356	// this on emscripten depending on what they end up doing.
357	setLibcallName(Call: RTLIB::RETURN_ADDRESS, Name: "emscripten_return_address");
358
359	// Always convert switches to br_tables unless there is only one case, which
360	// is equivalent to a simple branch. This reduces code size for wasm, and we
361	// defer possible jump table optimizations to the VM.
362	setMinimumJumpTableEntries(`2`);
363	}
364
365	MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL,
366	uint32_t AS) const {
367	if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
368	return MVT::externref;
369	if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
370	return MVT::funcref;
371	return TargetLowering::getPointerTy(DL, AS);
372	}
373
374	MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL,
375	uint32_t AS) const {
376	if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
377	return MVT::externref;
378	if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
379	return MVT::funcref;
380	return TargetLowering::getPointerMemTy(DL, AS);
381	}
382
383	TargetLowering::AtomicExpansionKind
384	WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
385	// We have wasm instructions for these
386	switch (AI->getOperation()) {
387	case AtomicRMWInst::Add:
388	case AtomicRMWInst::Sub:
389	case AtomicRMWInst::And:
390	case AtomicRMWInst::Or:
391	case AtomicRMWInst::Xor:
392	case AtomicRMWInst::Xchg:
393	return AtomicExpansionKind::None;
394	default:
395	break;
396	}
397	return AtomicExpansionKind::CmpXChg;
398	}
399
400	bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
401	// Implementation copied from X86TargetLowering.
402	unsigned Opc = VecOp.getOpcode();
403
404	// Assume target opcodes can't be scalarized.
405	// TODO - do we have any exceptions?
406	if (Opc >= ISD::BUILTIN_OP_END)
407	return false;
408
409	// If the vector op is not supported, try to convert to scalar.
410	EVT VecVT = VecOp.getValueType();
411	if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
412	return true;
413
414	// If the vector op is supported, but the scalar op is not, the transform may
415	// not be worthwhile.
416	EVT ScalarVT = VecVT.getScalarType();
417	return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT);
418	}
419
420	FastISel *WebAssemblyTargetLowering::createFastISel(
421	FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo LibInfo) const* {
422	return WebAssembly::createFastISel(funcInfo&: FuncInfo, libInfo: LibInfo);
423	}
424
425	MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /DL/,
426	EVT VT) const {
427	unsigned BitWidth = NextPowerOf2(A: VT.getSizeInBits() - `1`);
428	if (BitWidth > `1` && BitWidth < `8`)
429	BitWidth = `8`;
430
431	if (BitWidth > `64`) {
432	// The shift will be lowered to a libcall, and compiler-rt libcalls expect
433	// the count to be an i32.
434	BitWidth = `32`;
435	assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
436	"32-bit shift counts ought to be enough for anyone");
437	}
438
439	MVT Result = MVT::getIntegerVT(BitWidth);
440	assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
441	"Unable to represent scalar shift amount type");
442	return Result;
443	}
444
445	// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
446	// undefined result on invalid/overflow, to the WebAssembly opcode, which
447	// traps on invalid/overflow.
448	static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
449	MachineBasicBlock *BB,
450	const TargetInstrInfo &TII,
451	bool IsUnsigned, bool Int64,
452	bool Float64, unsigned LoweredOpcode) {
453	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
454
455	Register OutReg = MI.getOperand(i: `0`).getReg();
456	Register InReg = MI.getOperand(i: `1`).getReg();
457
458	unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
459	unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
460	unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
461	unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
462	unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
463	unsigned Eqz = WebAssembly::EQZ_I32;
464	unsigned And = WebAssembly::AND_I32;
465	int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
466	int64_t Substitute = IsUnsigned ? `0` : Limit;
467	double CmpVal = IsUnsigned ? -(double)Limit * `2.0` : -(double)Limit;
468	auto &Context = BB->getParent()->getFunction().getContext();
469	Type *Ty = Float64 ? Type::getDoubleTy(C&: Context) : Type::getFloatTy(C&: Context);
470
471	const BasicBlock *LLVMBB = BB->getBasicBlock();
472	MachineFunction *F = BB->getParent();
473	MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
474	MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
475	MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
476
477	MachineFunction::iterator It = ++BB->getIterator();
478	F->insert(MBBI: It, MBB: FalseMBB);
479	F->insert(MBBI: It, MBB: TrueMBB);
480	F->insert(MBBI: It, MBB: DoneMBB);
481
482	// Transfer the remainder of BB and its successor edges to DoneMBB.
483	DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
484	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
485
486	BB->addSuccessor(Succ: TrueMBB);
487	BB->addSuccessor(Succ: FalseMBB);
488	TrueMBB->addSuccessor(Succ: DoneMBB);
489	FalseMBB->addSuccessor(Succ: DoneMBB);
490
491	unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
492	Tmp0 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
493	Tmp1 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
494	CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
495	EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
496	FalseReg = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: OutReg));
497	TrueReg = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: OutReg));
498
499	MI.eraseFromParent();
500	// For signed numbers, we can do a single comparison to determine whether
501	// fabs(x) is within range.
502	if (IsUnsigned) {
503	Tmp0 = InReg;
504	} else {
505	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Abs), DestReg: Tmp0).addReg(RegNo: InReg);
506	}
507	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: FConst), DestReg: Tmp1)
508	.addFPImm(Val: cast<ConstantFP>(Val: ConstantFP::get(Ty, V: CmpVal)));
509	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: LT), DestReg: CmpReg).addReg(RegNo: Tmp0).addReg(RegNo: Tmp1);
510
511	// For unsigned numbers, we have to do a separate comparison with zero.
512	if (IsUnsigned) {
513	Tmp1 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
514	Register SecondCmpReg =
515	MRI.createVirtualRegister(&WebAssembly::I32RegClass);
516	Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
517	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: FConst), DestReg: Tmp1)
518	.addFPImm(Val: cast<ConstantFP>(Val: ConstantFP::get(Ty, V: `0.0`)));
519	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: GE), DestReg: SecondCmpReg).addReg(RegNo: Tmp0).addReg(RegNo: Tmp1);
520	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: And), DestReg: AndReg).addReg(RegNo: CmpReg).addReg(RegNo: SecondCmpReg);
521	CmpReg = AndReg;
522	}
523
524	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).addReg(RegNo: CmpReg);
525
526	// Create the CFG diamond to select between doing the conversion or using
527	// the substitute value.
528	BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
529	BuildMI(BB: FalseMBB, MIMD: DL, MCID: TII.get(Opcode: LoweredOpcode), DestReg: FalseReg).addReg(RegNo: InReg);
530	BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
531	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: IConst), DestReg: TrueReg).addImm(Val: Substitute);
532	BuildMI(BB&: *DoneMBB, I: DoneMBB->begin(), MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::PHI), DestReg: OutReg)
533	.addReg(RegNo: FalseReg)
534	.addMBB(MBB: FalseMBB)
535	.addReg(RegNo: TrueReg)
536	.addMBB(MBB: TrueMBB);
537
538	return DoneMBB;
539	}
540
541	static MachineBasicBlock *
542	LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
543	const WebAssemblySubtarget *Subtarget,
544	const TargetInstrInfo &TII) {
545	MachineInstr &CallParams = *CallResults.getPrevNode();
546	assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
547	assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS \|\|
548	CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
549
550	bool IsIndirect =
551	CallParams.getOperand(i: `0`).isReg() \|\| CallParams.getOperand(i: `0`).isFI();
552	bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
553
554	bool IsFuncrefCall = false;
555	if (IsIndirect && CallParams.getOperand(i: `0`).isReg()) {
556	Register Reg = CallParams.getOperand(i: `0`).getReg();
557	const MachineFunction *MF = BB->getParent();
558	const MachineRegisterInfo &MRI = MF->getRegInfo();
559	const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
560	IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
561	assert(!IsFuncrefCall \|\| Subtarget->hasReferenceTypes());
562	}
563
564	unsigned CallOp;
565	if (IsIndirect && IsRetCall) {
566	CallOp = WebAssembly::RET_CALL_INDIRECT;
567	} else if (IsIndirect) {
568	CallOp = WebAssembly::CALL_INDIRECT;
569	} else if (IsRetCall) {
570	CallOp = WebAssembly::RET_CALL;
571	} else {
572	CallOp = WebAssembly::CALL;
573	}
574
575	MachineFunction &MF = *BB->getParent();
576	const MCInstrDesc &MCID = TII.get(Opcode: CallOp);
577	MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
578
579	// See if we must truncate the function pointer.
580	// CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
581	// as 64-bit for uniformity with other pointer types.
582	// See also: WebAssemblyFastISel::selectCall
583	if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) {
584	Register Reg32 =
585	MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
586	auto &FnPtr = CallParams.getOperand(i: `0`);
587	BuildMI(*BB, CallResults.getIterator(), DL,
588	TII.get(WebAssembly::I32_WRAP_I64), Reg32)
589	.addReg(FnPtr.getReg());
590	FnPtr.setReg(Reg32);
591	}
592
593	// Move the function pointer to the end of the arguments for indirect calls
594	if (IsIndirect) {
595	auto FnPtr = CallParams.getOperand(i: `0`);
596	CallParams.removeOperand(OpNo: `0`);
597
598	// For funcrefs, call_indirect is done through __funcref_call_table and the
599	// funcref is always installed in slot 0 of the table, therefore instead of
600	// having the function pointer added at the end of the params list, a zero
601	// (the index in
602	// __funcref_call_table is added).
603	if (IsFuncrefCall) {
604	Register RegZero =
605	MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
606	MachineInstrBuilder MIBC0 =
607	BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(`0`);
608
609	BB->insert(I: CallResults.getIterator(), M: MIBC0);
610	MachineInstrBuilder (MF, CallParams).addReg(RegNo: RegZero);
611	} else
612	CallParams.addOperand(Op: FnPtr);
613	}
614
615	for (auto Def : CallResults.defs())
616	MIB.add(MO: Def);
617
618	if (IsIndirect) {
619	// Placeholder for the type index.
620	MIB.addImm(Val: `0`);
621	// The table into which this call_indirect indexes.
622	MCSymbolWasm *Table = IsFuncrefCall
623	? WebAssembly::getOrCreateFuncrefCallTableSymbol(
624	Ctx&: MF.getContext(), Subtarget)
625	: WebAssembly::getOrCreateFunctionTableSymbol(
626	Ctx&: MF.getContext(), Subtarget);
627	if (Subtarget->hasReferenceTypes()) {
628	MIB.addSym(Sym: Table);
629	} else {
630	// For the MVP there is at most one table whose number is 0, but we can't
631	// write a table symbol or issue relocations. Instead we just ensure the
632	// table is live and write a zero.
633	Table->setNoStrip();
634	MIB.addImm(Val: `0`);
635	}
636	}
637
638	for (auto Use : CallParams.uses())
639	MIB.add(MO: Use);
640
641	BB->insert(I: CallResults.getIterator(), M: MIB);
642	CallParams.eraseFromParent();
643	CallResults.eraseFromParent();
644
645	// If this is a funcref call, to avoid hidden GC roots, we need to clear the
646	// table slot with ref.null upon call_indirect return.
647	//
648	// This generates the following code, which comes right after a call_indirect
649	// of a funcref:
650	//
651	// i32.const 0
652	// ref.null func
653	// table.set __funcref_call_table
654	if (IsIndirect && IsFuncrefCall) {
655	MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
656	Ctx&: MF.getContext(), Subtarget);
657	Register RegZero =
658	MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
659	MachineInstr *Const0 =
660	BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(`0`);
661	BB->insertAfter(I: MIB.getInstr()->getIterator(), MI: Const0);
662
663	Register RegFuncref =
664	MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
665	MachineInstr *RefNull =
666	BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
667	BB->insertAfter(I: Const0->getIterator(), MI: RefNull);
668
669	MachineInstr *TableSet =
670	BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
671	.addSym(Table)
672	.addReg(RegZero)
673	.addReg(RegFuncref);
674	BB->insertAfter(I: RefNull->getIterator(), MI: TableSet);
675	}
676
677	return BB;
678	}
679
680	MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
681	MachineInstr &MI, MachineBasicBlock BB) const* {
682	const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
683	DebugLoc DL = MI.getDebugLoc();
684
685	switch (MI.getOpcode()) {
686	default:
687	llvm_unreachable("Unexpected instr type to insert");
688	case WebAssembly::FP_TO_SINT_I32_F32:
689	return LowerFPToInt(MI, DL, BB, TII, false, false, false,
690	WebAssembly::I32_TRUNC_S_F32);
691	case WebAssembly::FP_TO_UINT_I32_F32:
692	return LowerFPToInt(MI, DL, BB, TII, true, false, false,
693	WebAssembly::I32_TRUNC_U_F32);
694	case WebAssembly::FP_TO_SINT_I64_F32:
695	return LowerFPToInt(MI, DL, BB, TII, false, true, false,
696	WebAssembly::I64_TRUNC_S_F32);
697	case WebAssembly::FP_TO_UINT_I64_F32:
698	return LowerFPToInt(MI, DL, BB, TII, true, true, false,
699	WebAssembly::I64_TRUNC_U_F32);
700	case WebAssembly::FP_TO_SINT_I32_F64:
701	return LowerFPToInt(MI, DL, BB, TII, false, false, true,
702	WebAssembly::I32_TRUNC_S_F64);
703	case WebAssembly::FP_TO_UINT_I32_F64:
704	return LowerFPToInt(MI, DL, BB, TII, true, false, true,
705	WebAssembly::I32_TRUNC_U_F64);
706	case WebAssembly::FP_TO_SINT_I64_F64:
707	return LowerFPToInt(MI, DL, BB, TII, false, true, true,
708	WebAssembly::I64_TRUNC_S_F64);
709	case WebAssembly::FP_TO_UINT_I64_F64:
710	return LowerFPToInt(MI, DL, BB, TII, true, true, true,
711	WebAssembly::I64_TRUNC_U_F64);
712	case WebAssembly::CALL_RESULTS:
713	case WebAssembly::RET_CALL_RESULTS:
714	return LowerCallResults(CallResults&: MI, DL, BB, Subtarget, TII);
715	}
716	}
717
718	const char *
719	WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
720	switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
721	case WebAssemblyISD::FIRST_NUMBER:
722	case WebAssemblyISD::FIRST_MEM_OPCODE:
723	break;
724	#define HANDLE_NODETYPE(NODE) \
725	case WebAssemblyISD::NODE: \
726	return "WebAssemblyISD::" #NODE;
727	#define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
728	#include "WebAssemblyISD.def"
729	#undef HANDLE_MEM_NODETYPE
730	#undef HANDLE_NODETYPE
731	}
732	return nullptr;
733	}
734
735	std::pair<unsigned, const TargetRegisterClass *>
736	WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
737	const TargetRegisterInfo TRI, StringRef Constraint, MVT VT) const* {
738	// First, see if this is a constraint that directly corresponds to a
739	// WebAssembly register class.
740	if (Constraint.size() == `1`) {
741	switch (Constraint [`0`]) {
742	case `'r'`:
743	assert(VT != MVT::iPTR && "Pointer MVT not expected here");
744	if (Subtarget->hasSIMD128() && VT.isVector()) {
745	if (VT.getSizeInBits() == `128`)
746	return std::make_pair(`0U`, &WebAssembly::V128RegClass);
747	}
748	if (VT.isInteger() && !VT.isVector()) {
749	if (VT.getSizeInBits() <= `32`)
750	return std::make_pair(`0U`, &WebAssembly::I32RegClass);
751	if (VT.getSizeInBits() <= `64`)
752	return std::make_pair(`0U`, &WebAssembly::I64RegClass);
753	}
754	if (VT.isFloatingPoint() && !VT.isVector()) {
755	switch (VT.getSizeInBits()) {
756	case `32`:
757	return std::make_pair(`0U`, &WebAssembly::F32RegClass);
758	case `64`:
759	return std::make_pair(`0U`, &WebAssembly::F64RegClass);
760	default:
761	break;
762	}
763	}
764	break;
765	default:
766	break;
767	}
768	}
769
770	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
771	}
772
773	bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type Ty) const* {
774	// Assume ctz is a relatively cheap operation.
775	return true;
776	}
777
778	bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type Ty) const* {
779	// Assume clz is a relatively cheap operation.
780	return true;
781	}
782
783	bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
784	const AddrMode &AM,
785	Type Ty, unsigned* AS,
786	Instruction I) const* {
787	// WebAssembly offsets are added as unsigned without wrapping. The
788	// isLegalAddressingMode gives us no way to determine if wrapping could be
789	// happening, so we approximate this by accepting only non-negative offsets.
790	if (AM.BaseOffs < `0`)
791	return false;
792
793	// WebAssembly has no scale register operands.
794	if (AM.Scale != `0`)
795	return false;
796
797	// Everything else is legal.
798	return true;
799	}
800
801	bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
802	EVT /VT/, unsigned /AddrSpace/, Align /Align/,
803	MachineMemOperand::Flags /Flags/, unsigned Fast) const* {
804	// WebAssembly supports unaligned accesses, though it should be declared
805	// with the p2align attribute on loads and stores which do so, and there
806	// may be a performance impact. We tell LLVM they're "fast" because
807	// for the kinds of things that LLVM uses this for (merging adjacent stores
808	// of constants, etc.), WebAssembly implementations will either want the
809	// unaligned access or they'll split anyway.
810	if (Fast)
811	*Fast = `1`;
812	return true;
813	}
814
815	bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
816	AttributeList Attr) const {
817	// The current thinking is that wasm engines will perform this optimization,
818	// so we can save on code size.
819	return true;
820	}
821
822	bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
823	EVT ExtT = ExtVal.getValueType();
824	EVT MemT = cast<LoadSDNode>(Val: ExtVal ->getOperand(Num: `0`))->getValueType(ResNo: `0`);
825	return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) \|\|
826	(ExtT == MVT::v4i32 && MemT == MVT::v4i16) \|\|
827	(ExtT == MVT::v2i64 && MemT == MVT::v2i32);
828	}
829
830	bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
831	const GlobalAddressSDNode GA) const* {
832	// Wasm doesn't support function addresses with offsets
833	const GlobalValue *GV = GA->getGlobal();
834	return isa<Function>(Val: GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
835	}
836
837	bool WebAssemblyTargetLowering::shouldSinkOperands(
838	Instruction I, SmallVectorImpl<Use > &Ops) const {
839	using namespace llvm::PatternMatch;
840
841	if (!I->getType()->isVectorTy() \|\| !I->isShift())
842	return false;
843
844	Value *V = I->getOperand(i: `1`);
845	// We dont need to sink constant splat.
846	if (dyn_cast<Constant>(Val: V))
847	return false;
848
849	if (match(V, P: m_Shuffle(v1: m_InsertElt(Val: m_Value(), Elt: m_Value(), Idx: m_ZeroInt()),
850	v2: m_Value(), mask: m_ZeroMask ()))) {
851	// Sink insert
852	Ops.push_back(Elt: &cast<Instruction>(Val: V)->getOperandUse(i: `0`));
853	// Sink shuffle
854	Ops.push_back(Elt: &I->getOperandUse(i: `1`));
855	return true;
856	}
857
858	return false;
859	}
860
861	EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
862	LLVMContext &C,
863	EVT VT) const {
864	if (VT.isVector())
865	return VT.changeVectorElementTypeToInteger();
866
867	// So far, all branch instructions in Wasm take an I32 condition.
868	// The default TargetLowering::getSetCCResultType returns the pointer size,
869	// which would be useful to reduce instruction counts when testing
870	// against 64-bit pointers/values if at some point Wasm supports that.
871	return EVT::getIntegerVT(Context&: C, BitWidth: `32`);
872	}
873
874	bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
875	const CallInst &I,
876	MachineFunction &MF,
877	unsigned Intrinsic) const {
878	switch (Intrinsic) {
879	case Intrinsic::wasm_memory_atomic_notify:
880	Info.opc = ISD::INTRINSIC_W_CHAIN;
881	Info.memVT = MVT::i32;
882	Info.ptrVal = I.getArgOperand(i: `0`);
883	Info.offset = `0`;
884	Info.align = Align (`4`);
885	// atomic.notify instruction does not really load the memory specified with
886	// this argument, but MachineMemOperand should either be load or store, so
887	// we set this to a load.
888	// FIXME Volatile isn't really correct, but currently all LLVM atomic
889	// instructions are treated as volatiles in the backend, so we should be
890	// consistent. The same applies for wasm_atomic_wait intrinsics too.
891	Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
892	return true;
893	case Intrinsic::wasm_memory_atomic_wait32:
894	Info.opc = ISD::INTRINSIC_W_CHAIN;
895	Info.memVT = MVT::i32;
896	Info.ptrVal = I.getArgOperand(i: `0`);
897	Info.offset = `0`;
898	Info.align = Align (`4`);
899	Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
900	return true;
901	case Intrinsic::wasm_memory_atomic_wait64:
902	Info.opc = ISD::INTRINSIC_W_CHAIN;
903	Info.memVT = MVT::i64;
904	Info.ptrVal = I.getArgOperand(i: `0`);
905	Info.offset = `0`;
906	Info.align = Align (`8`);
907	Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
908	return true;
909	default:
910	return false;
911	}
912	}
913
914	void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
915	const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
916	const SelectionDAG &DAG, unsigned Depth) const {
917	switch (Op.getOpcode()) {
918	default:
919	break;
920	case ISD::INTRINSIC_WO_CHAIN: {
921	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
922	switch (IntNo) {
923	default:
924	break;
925	case Intrinsic::wasm_bitmask: {
926	unsigned BitWidth = Known.getBitWidth();
927	EVT VT = Op.getOperand(i: `1`).getSimpleValueType();
928	unsigned PossibleBits = VT.getVectorNumElements();
929	APInt ZeroMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - PossibleBits);
930	Known.Zero \|= ZeroMask;
931	break;
932	}
933	}
934	}
935	}
936	}
937
938	TargetLoweringBase::LegalizeTypeAction
939	WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
940	if (VT.isFixedLengthVector()) {
941	MVT EltVT = VT.getVectorElementType();
942	// We have legal vector types with these lane types, so widening the
943	// vector would let us use some of the lanes directly without having to
944	// extend or truncate values.
945	if (EltVT == MVT::i8 \|\| EltVT == MVT::i16 \|\| EltVT == MVT::i32 \|\|
946	EltVT == MVT::i64 \|\| EltVT == MVT::f32 \|\| EltVT == MVT::f64)
947	return TypeWidenVector;
948	}
949
950	return TargetLoweringBase::getPreferredVectorAction(VT);
951	}
952
953	bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
954	SDValue Op, const TargetLoweringOpt &TLO) const {
955	// ISel process runs DAGCombiner after legalization; this step is called
956	// SelectionDAG optimization phase. This post-legalization combining process
957	// runs DAGCombiner on each node, and if there was a change to be made,
958	// re-runs legalization again on it and its user nodes to make sure
959	// everythiing is in a legalized state.
960	//
961	// The legalization calls lowering routines, and we do our custom lowering for
962	// build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
963	// into zeros. But there is a set of routines in DAGCombiner that turns unused
964	// (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
965	// turns unused vector elements into undefs. But this routine does not work
966	// with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
967	// combination can result in a infinite loop, in which undefs are converted to
968	// zeros in legalization and back to undefs in combining.
969	//
970	// So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
971	// running for build_vectors.
972	if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
973	return false;
974	return true;
975	}
976
977	//===----------------------------------------------------------------------===//
978	// WebAssembly Lowering private implementation.
979	//===----------------------------------------------------------------------===//
980
981	//===----------------------------------------------------------------------===//
982	// Lowering Code
983	//===----------------------------------------------------------------------===//
984
985	static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
986	MachineFunction &MF = DAG.getMachineFunction();
987	DAG.getContext()->diagnose(
988	DI: DiagnosticInfoUnsupported (MF.getFunction(), Msg, DL.getDebugLoc()));
989	}
990
991	// Test whether the given calling convention is supported.
992	static bool callingConvSupported(CallingConv::ID CallConv) {
993	// We currently support the language-independent target-independent
994	// conventions. We don't yet have a way to annotate calls with properties like
995	// "cold", and we don't have any call-clobbered registers, so these are mostly
996	// all handled the same.
997	return CallConv == CallingConv::C \|\| CallConv == CallingConv::Fast \|\|
998	CallConv == CallingConv::Cold \|\|
999	CallConv == CallingConv::PreserveMost \|\|
1000	CallConv == CallingConv::PreserveAll \|\|
1001	CallConv == CallingConv::CXX_FAST_TLS \|\|
1002	CallConv == CallingConv::WASM_EmscriptenInvoke \|\|
1003	CallConv == CallingConv::Swift;
1004	}
1005
1006	SDValue
1007	WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1008	SmallVectorImpl<SDValue> &InVals) const {
1009	SelectionDAG &DAG = CLI.DAG;
1010	SDLoc DL = CLI.DL;
1011	SDValue Chain = CLI.Chain;
1012	SDValue Callee = CLI.Callee;
1013	MachineFunction &MF = DAG.getMachineFunction();
1014	auto Layout = MF.getDataLayout();
1015
1016	CallingConv::ID CallConv = CLI.CallConv;
1017	if (!callingConvSupported(CallConv))
1018	fail(DL, DAG,
1019	Msg: "WebAssembly doesn't support language-specific or target-specific "
1020	"calling conventions yet");
1021	if (CLI.IsPatchPoint)
1022	fail(DL, DAG, Msg: "WebAssembly doesn't support patch point yet");
1023
1024	if (CLI.IsTailCall) {
1025	auto NoTail = [&](const char *Msg) {
1026	if (CLI.CB && CLI.CB->isMustTailCall())
1027	fail(DL, DAG, Msg);
1028	CLI.IsTailCall = false;
1029	};
1030
1031	if (!Subtarget->hasTailCall())
1032	NoTail ("WebAssembly 'tail-call' feature not enabled");
1033
1034	// Varargs calls cannot be tail calls because the buffer is on the stack
1035	if (CLI.IsVarArg)
1036	NoTail ("WebAssembly does not support varargs tail calls");
1037
1038	// Do not tail call unless caller and callee return types match
1039	const Function &F = MF.getFunction();
1040	const TargetMachine &TM = getTargetMachine();
1041	Type *RetTy = F.getReturnType();
1042	SmallVector<MVT, `4`> CallerRetTys;
1043	SmallVector<MVT, `4`> CalleeRetTys;
1044	computeLegalValueVTs(F, TM, Ty: RetTy, ValueVTs&: CallerRetTys);
1045	computeLegalValueVTs(F, TM, Ty: CLI.RetTy, ValueVTs&: CalleeRetTys);
1046	bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1047	std::equal(first1: CallerRetTys.begin(), last1: CallerRetTys.end(),
1048	first2: CalleeRetTys.begin());
1049	if (!TypesMatch)
1050	NoTail ("WebAssembly tail call requires caller and callee return types to "
1051	"match");
1052
1053	// If pointers to local stack values are passed, we cannot tail call
1054	if (CLI.CB) {
1055	for (auto &Arg : CLI.CB->args()) {
1056	Value *Val = Arg.get();
1057	// Trace the value back through pointer operations
1058	while (true) {
1059	Value *Src = Val->stripPointerCastsAndAliases();
1060	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: Src))
1061	Src = GEP->getPointerOperand();
1062	if (Val == Src)
1063	break;
1064	Val = Src;
1065	}
1066	if (isa<AllocaInst>(Val)) {
1067	NoTail (
1068	"WebAssembly does not support tail calling with stack arguments");
1069	break;
1070	}
1071	}
1072	}
1073	}
1074
1075	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1076	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1077	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1078
1079	// The generic code may have added an sret argument. If we're lowering an
1080	// invoke function, the ABI requires that the function pointer be the first
1081	// argument, so we may have to swap the arguments.
1082	if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= `2` &&
1083	Outs [`0`].Flags.isSRet()) {
1084	std::swap(a&: Outs [`0`], b&: Outs [`1`]);
1085	std::swap(a&: OutVals [`0`], b&: OutVals [`1`]);
1086	}
1087
1088	bool HasSwiftSelfArg = false;
1089	bool HasSwiftErrorArg = false;
1090	unsigned NumFixedArgs = `0`;
1091	for (unsigned I = `0`; I < Outs.size(); ++I) {
1092	const ISD::OutputArg &Out = Outs [I];
1093	SDValue &OutVal = OutVals [I];
1094	HasSwiftSelfArg \|= Out.Flags.isSwiftSelf();
1095	HasSwiftErrorArg \|= Out.Flags.isSwiftError();
1096	if (Out.Flags.isNest())
1097	fail(DL, DAG, Msg: "WebAssembly hasn't implemented nest arguments");
1098	if (Out.Flags.isInAlloca())
1099	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca arguments");
1100	if (Out.Flags.isInConsecutiveRegs())
1101	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs arguments");
1102	if (Out.Flags.isInConsecutiveRegsLast())
1103	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last arguments");
1104	if (Out.Flags.isByVal() && Out.Flags.getByValSize() != `0`) {
1105	auto &MFI = MF.getFrameInfo();
1106	int FI = MFI.CreateStackObject(Size: Out.Flags.getByValSize(),
1107	Alignment: Out.Flags.getNonZeroByValAlign(),
1108	/isSS=/isSpillSlot: false);
1109	SDValue SizeNode =
1110	DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1111	SDValue FINode = DAG.getFrameIndex(FI, VT: getPointerTy(DL: Layout));
1112	Chain = DAG.getMemcpy(
1113	Chain, dl: DL, Dst: FINode, Src: OutVal, Size: SizeNode, Alignment: Out.Flags.getNonZeroByValAlign(),
1114	/isVolatile/ isVol: false, /AlwaysInline=/false,
1115	/isTailCall/ false, DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
1116	OutVal = FINode;
1117	}
1118	// Count the number of fixed args after* legalization.*
1119	NumFixedArgs += Out.IsFixed;
1120	}
1121
1122	bool IsVarArg = CLI.IsVarArg;
1123	auto PtrVT = getPointerTy(DL: Layout);
1124
1125	// For swiftcc, emit additional swiftself and swifterror arguments
1126	// if there aren't. These additional arguments are also added for callee
1127	// signature They are necessary to match callee and caller signature for
1128	// indirect call.
1129	if (CallConv == CallingConv::Swift) {
1130	if (!HasSwiftSelfArg) {
1131	NumFixedArgs++;
1132	ISD::OutputArg Arg;
1133	Arg.Flags.setSwiftSelf();
1134	CLI.Outs.push_back(Elt: Arg);
1135	SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1136	CLI.OutVals.push_back(Elt: ArgVal);
1137	}
1138	if (!HasSwiftErrorArg) {
1139	NumFixedArgs++;
1140	ISD::OutputArg Arg;
1141	Arg.Flags.setSwiftError();
1142	CLI.Outs.push_back(Elt: Arg);
1143	SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1144	CLI.OutVals.push_back(Elt: ArgVal);
1145	}
1146	}
1147
1148	// Analyze operands of the call, assigning locations to each operand.
1149	SmallVector<CCValAssign, `16`> ArgLocs;
1150	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1151
1152	if (IsVarArg) {
1153	// Outgoing non-fixed arguments are placed in a buffer. First
1154	// compute their offsets and the total amount of buffer space needed.
1155	for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1156	const ISD::OutputArg &Out = Outs [I];
1157	SDValue &Arg = OutVals [I];
1158	EVT VT = Arg.getValueType();
1159	assert(VT != MVT::iPTR && "Legalized args should be concrete");
1160	Type Ty = VT.getTypeForEVT(Context&: DAG.getContext());
1161	Align Alignment =
1162	std::max(a: Out.Flags.getNonZeroOrigAlign(), b: Layout.getABITypeAlign(Ty));
1163	unsigned Offset =
1164	CCInfo.AllocateStack(Size: Layout.getTypeAllocSize(Ty), Alignment);
1165	CCInfo.addLoc(V: CCValAssign::getMem(ValNo: ArgLocs.size(), ValVT: VT.getSimpleVT(),
1166	Offset, LocVT: VT.getSimpleVT(),
1167	HTP: CCValAssign::Full));
1168	}
1169	}
1170
1171	unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1172
1173	SDValue FINode;
1174	if (IsVarArg && NumBytes) {
1175	// For non-fixed arguments, next emit stores to store the argument values
1176	// to the stack buffer at the offsets computed above.
1177	int FI = MF.getFrameInfo().CreateStackObject(Size: NumBytes,
1178	Alignment: Layout.getStackAlignment(),
1179	/isSS=/isSpillSlot: false);
1180	unsigned ValNo = `0`;
1181	SmallVector<SDValue, `8`> Chains;
1182	for (SDValue Arg : drop_begin(RangeOrContainer&: OutVals, N: NumFixedArgs)) {
1183	assert(ArgLocs[ValNo].getValNo() == ValNo &&
1184	"ArgLocs should remain in order and only hold varargs args");
1185	unsigned Offset = ArgLocs [ValNo++].getLocMemOffset();
1186	FINode = DAG.getFrameIndex(FI, VT: getPointerTy(DL: Layout));
1187	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FINode,
1188	N2: DAG.getConstant(Val: Offset, DL, VT: PtrVT));
1189	Chains.push_back(
1190	Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: Add,
1191	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset)));
1192	}
1193	if (!Chains.empty())
1194	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1195	} else if (IsVarArg) {
1196	FINode = DAG.getIntPtrConstant(Val: `0`, DL);
1197	}
1198
1199	if (Callee ->getOpcode() == ISD::GlobalAddress) {
1200	// If the callee is a GlobalAddress node (quite common, every direct call
1201	// is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1202	// doesn't at MO_GOT which is not needed for direct calls.
1203	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Callee);
1204	Callee = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL,
1205	VT: getPointerTy(DL: DAG.getDataLayout()),
1206	offset: GA->getOffset());
1207	Callee = DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL,
1208	VT: getPointerTy(DL: DAG.getDataLayout()), Operand: Callee);
1209	}
1210
1211	// Compute the operands for the CALLn node.
1212	SmallVector<SDValue, `16`> Ops;
1213	Ops.push_back(Elt: Chain);
1214	Ops.push_back(Elt: Callee);
1215
1216	// Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1217	// isn't reliable.
1218	Ops.append(in_start: OutVals.begin(),
1219	in_end: IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1220	// Add a pointer to the vararg buffer.
1221	if (IsVarArg)
1222	Ops.push_back(Elt: FINode);
1223
1224	SmallVector<EVT, `8`> InTys;
1225	for (const auto &In : Ins) {
1226	assert(!In.Flags.isByVal() && "byval is not valid for return values");
1227	assert(!In.Flags.isNest() && "nest is not valid for return values");
1228	if (In.Flags.isInAlloca())
1229	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca return values");
1230	if (In.Flags.isInConsecutiveRegs())
1231	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs return values");
1232	if (In.Flags.isInConsecutiveRegsLast())
1233	fail(DL, DAG,
1234	Msg: "WebAssembly hasn't implemented cons regs last return values");
1235	// Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1236	// registers.
1237	InTys.push_back(Elt: In.VT);
1238	}
1239
1240	// Lastly, if this is a call to a funcref we need to add an instruction
1241	// table.set to the chain and transform the call.
1242	if (CLI.CB && WebAssembly::isWebAssemblyFuncrefType(
1243	Ty: CLI.CB->getCalledOperand()->getType())) {
1244	// In the absence of function references proposal where a funcref call is
1245	// lowered to call_ref, using reference types we generate a table.set to set
1246	// the funcref to a special table used solely for this purpose, followed by
1247	// a call_indirect. Here we just generate the table set, and return the
1248	// SDValue of the table.set so that LowerCall can finalize the lowering by
1249	// generating the call_indirect.
1250	SDValue Chain = Ops [`0`];
1251
1252	MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
1253	Ctx&: MF.getContext(), Subtarget);
1254	SDValue Sym = DAG.getMCSymbol(Sym: Table, VT: PtrVT);
1255	SDValue TableSlot = DAG.getConstant(`0`, DL, MVT::i32);
1256	SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1257	SDValue TableSet = DAG.getMemIntrinsicNode(
1258	WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1259	MVT::funcref,
1260	// Machine Mem Operand args
1261	MachinePointerInfo(
1262	WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF),
1263	CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1264	MachineMemOperand::MOStore);
1265
1266	Ops [`0`] = TableSet; // The new chain is the TableSet itself
1267	}
1268
1269	if (CLI.IsTailCall) {
1270	// ret_calls do not return values to the current frame
1271	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1272	return DAG.getNode(Opcode: WebAssemblyISD::RET_CALL, DL, VTList: NodeTys, Ops);
1273	}
1274
1275	InTys.push_back(MVT::Other);
1276	SDVTList InTyList = DAG.getVTList(VTs: InTys);
1277	SDValue Res = DAG.getNode(Opcode: WebAssemblyISD::CALL, DL, VTList: InTyList, Ops);
1278
1279	for (size_t I = `0`; I < Ins.size(); ++I)
1280	InVals.push_back(Elt: Res.getValue(R: I));
1281
1282	// Return the chain
1283	return Res.getValue(R: Ins.size());
1284	}
1285
1286	bool WebAssemblyTargetLowering::CanLowerReturn(
1287	CallingConv::ID /CallConv/, MachineFunction & /MF/, bool /IsVarArg/,
1288	const SmallVectorImpl<ISD::OutputArg> &Outs,
1289	LLVMContext & /Context/) const {
1290	// WebAssembly can only handle returning tuples with multivalue enabled
1291	return WebAssembly::canLowerReturn(ResultSize: Outs.size(), Subtarget);
1292	}
1293
1294	SDValue WebAssemblyTargetLowering::LowerReturn(
1295	SDValue Chain, CallingConv::ID CallConv, bool /IsVarArg/,
1296	const SmallVectorImpl<ISD::OutputArg> &Outs,
1297	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1298	SelectionDAG &DAG) const {
1299	assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1300	"MVP WebAssembly can only return up to one value");
1301	if (!callingConvSupported(CallConv))
1302	fail(DL, DAG, Msg: "WebAssembly doesn't support non-C calling conventions");
1303
1304	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
1305	RetOps.append(in_start: OutVals.begin(), in_end: OutVals.end());
1306	Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1307
1308	// Record the number and types of the return values.
1309	for (const ISD::OutputArg &Out : Outs) {
1310	assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1311	assert(!Out.Flags.isNest() && "nest is not valid for return values");
1312	assert(Out.IsFixed && "non-fixed return value is not valid");
1313	if (Out.Flags.isInAlloca())
1314	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca results");
1315	if (Out.Flags.isInConsecutiveRegs())
1316	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs results");
1317	if (Out.Flags.isInConsecutiveRegsLast())
1318	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last results");
1319	}
1320
1321	return Chain;
1322	}
1323
1324	SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1325	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1326	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1327	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1328	if (!callingConvSupported(CallConv))
1329	fail(DL, DAG, Msg: "WebAssembly doesn't support non-C calling conventions");
1330
1331	MachineFunction &MF = DAG.getMachineFunction();
1332	auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1333
1334	// Set up the incoming ARGUMENTS value, which serves to represent the liveness
1335	// of the incoming values before they're represented by virtual registers.
1336	MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1337
1338	bool HasSwiftErrorArg = false;
1339	bool HasSwiftSelfArg = false;
1340	for (const ISD::InputArg &In : Ins) {
1341	HasSwiftSelfArg \|= In.Flags.isSwiftSelf();
1342	HasSwiftErrorArg \|= In.Flags.isSwiftError();
1343	if (In.Flags.isInAlloca())
1344	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca arguments");
1345	if (In.Flags.isNest())
1346	fail(DL, DAG, Msg: "WebAssembly hasn't implemented nest arguments");
1347	if (In.Flags.isInConsecutiveRegs())
1348	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs arguments");
1349	if (In.Flags.isInConsecutiveRegsLast())
1350	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last arguments");
1351	// Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1352	// registers.
1353	InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1354	DAG.getTargetConstant(InVals.size(),
1355	DL, MVT::i32))
1356	: DAG.getUNDEF(In.VT));
1357
1358	// Record the number and types of arguments.
1359	MFI->addParam(VT: In.VT);
1360	}
1361
1362	// For swiftcc, emit additional swiftself and swifterror arguments
1363	// if there aren't. These additional arguments are also added for callee
1364	// signature They are necessary to match callee and caller signature for
1365	// indirect call.
1366	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
1367	if (CallConv == CallingConv::Swift) {
1368	if (!HasSwiftSelfArg) {
1369	MFI->addParam(VT: PtrVT);
1370	}
1371	if (!HasSwiftErrorArg) {
1372	MFI->addParam(VT: PtrVT);
1373	}
1374	}
1375	// Varargs are copied into a buffer allocated by the caller, and a pointer to
1376	// the buffer is passed as an argument.
1377	if (IsVarArg) {
1378	MVT PtrVT = getPointerTy(DL: MF.getDataLayout());
1379	Register VarargVreg =
1380	MF.getRegInfo().createVirtualRegister(RegClass: getRegClassFor(VT: PtrVT));
1381	MFI->setVarargBufferVreg(VarargVreg);
1382	Chain = DAG.getCopyToReg(
1383	Chain, DL, VarargVreg,
1384	DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1385	DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1386	MFI->addParam(VT: PtrVT);
1387	}
1388
1389	// Record the number and types of arguments and results.
1390	SmallVector<MVT, `4`> Params;
1391	SmallVector<MVT, `4`> Results;
1392	computeSignatureVTs(Ty: MF.getFunction().getFunctionType(), TargetFunc: &MF.getFunction(),
1393	ContextFunc: MF.getFunction(), TM: DAG.getTarget(), Params, Results);
1394	for (MVT VT : Results)
1395	MFI->addResult(VT);
1396	// TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1397	// the param logic here with ComputeSignatureVTs
1398	assert(MFI->getParams().size() == Params.size() &&
1399	std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1400	Params.begin()));
1401
1402	return Chain;
1403	}
1404
1405	void WebAssemblyTargetLowering::ReplaceNodeResults(
1406	SDNode N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const* {
1407	switch (N->getOpcode()) {
1408	case ISD::SIGN_EXTEND_INREG:
1409	// Do not add any results, signifying that N should not be custom lowered
1410	// after all. This happens because simd128 turns on custom lowering for
1411	// SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1412	// illegal type.
1413	break;
1414	case ISD::SIGN_EXTEND_VECTOR_INREG:
1415	case ISD::ZERO_EXTEND_VECTOR_INREG:
1416	// Do not add any results, signifying that N should not be custom lowered.
1417	// EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1418	break;
1419	default:
1420	llvm_unreachable(
1421	"ReplaceNodeResults not implemented for this op for WebAssembly!");
1422	}
1423	}
1424
1425	//===----------------------------------------------------------------------===//
1426	// Custom lowering hooks.
1427	//===----------------------------------------------------------------------===//
1428
1429	SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1430	SelectionDAG &DAG) const {
1431	SDLoc DL(Op);
1432	switch (Op.getOpcode()) {
1433	default:
1434	llvm_unreachable("unimplemented operation lowering");
1435	return SDValue ();
1436	case ISD::FrameIndex:
1437	return LowerFrameIndex(Op, DAG);
1438	case ISD::GlobalAddress:
1439	return LowerGlobalAddress(Op, DAG);
1440	case ISD::GlobalTLSAddress:
1441	return LowerGlobalTLSAddress(Op, DAG);
1442	case ISD::ExternalSymbol:
1443	return LowerExternalSymbol(Op, DAG);
1444	case ISD::JumpTable:
1445	return LowerJumpTable(Op, DAG);
1446	case ISD::BR_JT:
1447	return LowerBR_JT(Op, DAG);
1448	case ISD::VASTART:
1449	return LowerVASTART(Op, DAG);
1450	case ISD::BlockAddress:
1451	case ISD::BRIND:
1452	fail(DL, DAG, Msg: "WebAssembly hasn't implemented computed gotos");
1453	return SDValue ();
1454	case ISD::RETURNADDR:
1455	return LowerRETURNADDR(Op, DAG);
1456	case ISD::FRAMEADDR:
1457	return LowerFRAMEADDR(Op, DAG);
1458	case ISD::CopyToReg:
1459	return LowerCopyToReg(Op, DAG);
1460	case ISD::EXTRACT_VECTOR_ELT:
1461	case ISD::INSERT_VECTOR_ELT:
1462	return LowerAccessVectorElement(Op, DAG);
1463	case ISD::INTRINSIC_VOID:
1464	case ISD::INTRINSIC_WO_CHAIN:
1465	case ISD::INTRINSIC_W_CHAIN:
1466	return LowerIntrinsic(Op, DAG);
1467	case ISD::SIGN_EXTEND_INREG:
1468	return LowerSIGN_EXTEND_INREG(Op, DAG);
1469	case ISD::ZERO_EXTEND_VECTOR_INREG:
1470	case ISD::SIGN_EXTEND_VECTOR_INREG:
1471	return LowerEXTEND_VECTOR_INREG(Op, DAG);
1472	case ISD::BUILD_VECTOR:
1473	return LowerBUILD_VECTOR(Op, DAG);
1474	case ISD::VECTOR_SHUFFLE:
1475	return LowerVECTOR_SHUFFLE(Op, DAG);
1476	case ISD::SETCC:
1477	return LowerSETCC(Op, DAG);
1478	case ISD::SHL:
1479	case ISD::SRA:
1480	case ISD::SRL:
1481	return LowerShift(Op, DAG);
1482	case ISD::FP_TO_SINT_SAT:
1483	case ISD::FP_TO_UINT_SAT:
1484	return LowerFP_TO_INT_SAT(Op, DAG);
1485	case ISD::LOAD:
1486	return LowerLoad(Op, DAG);
1487	case ISD::STORE:
1488	return LowerStore(Op, DAG);
1489	case ISD::CTPOP:
1490	case ISD::CTLZ:
1491	case ISD::CTTZ:
1492	return DAG.UnrollVectorOp(N: Op.getNode());
1493	}
1494	}
1495
1496	static bool IsWebAssemblyGlobal(SDValue Op) {
1497	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
1498	return WebAssembly::isWasmVarAddressSpace(AS: GA->getAddressSpace());
1499
1500	return false;
1501	}
1502
1503	static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1504	SelectionDAG &DAG) {
1505	const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op);
1506	if (!FI)
1507	return std::nullopt;
1508
1509	auto &MF = DAG.getMachineFunction();
1510	return WebAssemblyFrameLowering::getLocalForStackObject(MF, FrameIndex: FI->getIndex());
1511	}
1512
1513	SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1514	SelectionDAG &DAG) const {
1515	SDLoc DL(Op);
1516	StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
1517	const SDValue &Value = SN->getValue();
1518	const SDValue &Base = SN->getBasePtr();
1519	const SDValue &Offset = SN->getOffset();
1520
1521	if (IsWebAssemblyGlobal(Op: Base)) {
1522	if (!Offset ->isUndef())
1523	report_fatal_error(reason: "unexpected offset when storing to webassembly global",
1524	gen_crash_diag: false);
1525
1526	SDVTList Tys = DAG.getVTList(MVT::Other);
1527	SDValue Ops[] = {SN->getChain(), Value, Base};
1528	return DAG.getMemIntrinsicNode(Opcode: WebAssemblyISD::GLOBAL_SET, dl: DL, VTList: Tys, Ops,
1529	MemVT: SN->getMemoryVT(), MMO: SN->getMemOperand());
1530	}
1531
1532	if (std::optional<unsigned> Local = IsWebAssemblyLocal(Op: Base, DAG)) {
1533	if (!Offset ->isUndef())
1534	report_fatal_error(reason: "unexpected offset when storing to webassembly local",
1535	gen_crash_diag: false);
1536
1537	SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1538	SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1539	SDValue Ops[] = {SN->getChain(), Idx, Value};
1540	return DAG.getNode(Opcode: WebAssemblyISD::LOCAL_SET, DL, VTList: Tys, Ops);
1541	}
1542
1543	if (WebAssembly::isWasmVarAddressSpace(AS: SN->getAddressSpace()))
1544	report_fatal_error(
1545	reason: "Encountered an unlowerable store to the wasm_var address space",
1546	gen_crash_diag: false);
1547
1548	return Op;
1549	}
1550
1551	SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1552	SelectionDAG &DAG) const {
1553	SDLoc DL(Op);
1554	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
1555	const SDValue &Base = LN->getBasePtr();
1556	const SDValue &Offset = LN->getOffset();
1557
1558	if (IsWebAssemblyGlobal(Op: Base)) {
1559	if (!Offset ->isUndef())
1560	report_fatal_error(
1561	reason: "unexpected offset when loading from webassembly global", gen_crash_diag: false);
1562
1563	SDVTList Tys = DAG.getVTList(LN->getValueType(`0`), MVT::Other);
1564	SDValue Ops[] = {LN->getChain(), Base};
1565	return DAG.getMemIntrinsicNode(Opcode: WebAssemblyISD::GLOBAL_GET, dl: DL, VTList: Tys, Ops,
1566	MemVT: LN->getMemoryVT(), MMO: LN->getMemOperand());
1567	}
1568
1569	if (std::optional<unsigned> Local = IsWebAssemblyLocal(Op: Base, DAG)) {
1570	if (!Offset ->isUndef())
1571	report_fatal_error(
1572	reason: "unexpected offset when loading from webassembly local", gen_crash_diag: false);
1573
1574	SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1575	EVT LocalVT = LN->getValueType(ResNo: `0`);
1576	SDValue LocalGet = DAG.getNode(Opcode: WebAssemblyISD::LOCAL_GET, DL, VT: LocalVT,
1577	Ops: {LN->getChain(), Idx});
1578	SDValue Result = DAG.getMergeValues(Ops: {LocalGet, LN->getChain()}, dl: DL);
1579	assert(Result ->getNumValues() == `2` && "Loads must carry a chain!");
1580	return Result;
1581	}
1582
1583	if (WebAssembly::isWasmVarAddressSpace(AS: LN->getAddressSpace()))
1584	report_fatal_error(
1585	reason: "Encountered an unlowerable load from the wasm_var address space",
1586	gen_crash_diag: false);
1587
1588	return Op;
1589	}
1590
1591	SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1592	SelectionDAG &DAG) const {
1593	SDValue Src = Op.getOperand(i: `2`);
1594	if (isa<FrameIndexSDNode>(Val: Src.getNode())) {
1595	// CopyToReg nodes don't support FrameIndex operands. Other targets select
1596	// the FI to some LEA-like instruction, but since we don't have that, we
1597	// need to insert some kind of instruction that can take an FI operand and
1598	// produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1599	// local.copy between Op and its FI operand.
1600	SDValue Chain = Op.getOperand(i: `0`);
1601	SDLoc DL(Op);
1602	Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i: `1`))->getReg();
1603	EVT VT = Src.getValueType();
1604	SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1605	: WebAssembly::COPY_I64,
1606	DL, VT, Src),
1607	`0`);
1608	return Op.getNode()->getNumValues() == `1`
1609	? DAG.getCopyToReg(Chain, dl: DL, Reg, N: Copy)
1610	: DAG.getCopyToReg(Chain, dl: DL, Reg, N: Copy,
1611	Glue: Op.getNumOperands() == `4` ? Op.getOperand(i: `3`)
1612	: SDValue ());
1613	}
1614	return SDValue ();
1615	}
1616
1617	SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1618	SelectionDAG &DAG) const {
1619	int FI = cast<FrameIndexSDNode>(Val&: Op)->getIndex();
1620	return DAG.getTargetFrameIndex(FI, VT: Op.getValueType());
1621	}
1622
1623	SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1624	SelectionDAG &DAG) const {
1625	SDLoc DL(Op);
1626
1627	if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1628	fail(DL, DAG,
1629	Msg: "Non-Emscripten WebAssembly hasn't implemented "
1630	"__builtin_return_address");
1631	return SDValue ();
1632	}
1633
1634	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1635	return SDValue ();
1636
1637	unsigned Depth = Op.getConstantOperandVal(i: `0`);
1638	MakeLibCallOptions CallOptions;
1639	return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1640	{DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1641	.first;
1642	}
1643
1644	SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1645	SelectionDAG &DAG) const {
1646	// Non-zero depths are not supported by WebAssembly currently. Use the
1647	// legalizer's default expansion, which is to return 0 (what this function is
1648	// documented to do).
1649	if (Op.getConstantOperandVal(i: `0`) > `0`)
1650	return SDValue ();
1651
1652	DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1653	EVT VT = Op.getValueType();
1654	Register FP =
1655	Subtarget->getRegisterInfo()->getFrameRegister(MF: DAG.getMachineFunction());
1656	return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc (Op), Reg: FP, VT);
1657	}
1658
1659	SDValue
1660	WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1661	SelectionDAG &DAG) const {
1662	SDLoc DL(Op);
1663	const auto *GA = cast<GlobalAddressSDNode>(Val&: Op);
1664
1665	MachineFunction &MF = DAG.getMachineFunction();
1666	if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
1667	report_fatal_error(reason: "cannot use thread-local storage without bulk memory",
1668	gen_crash_diag: false);
1669
1670	const GlobalValue *GV = GA->getGlobal();
1671
1672	// Currently only Emscripten supports dynamic linking with threads. Therefore,
1673	// on other targets, if we have thread-local storage, only the local-exec
1674	// model is possible.
1675	auto model = Subtarget->getTargetTriple().isOSEmscripten()
1676	? GV->getThreadLocalMode()
1677	: GlobalValue::LocalExecTLSModel;
1678
1679	// Unsupported TLS modes
1680	assert(model != GlobalValue::NotThreadLocal);
1681	assert(model != GlobalValue::InitialExecTLSModel);
1682
1683	if (model == GlobalValue::LocalExecTLSModel \|\|
1684	model == GlobalValue::LocalDynamicTLSModel \|\|
1685	(model == GlobalValue::GeneralDynamicTLSModel &&
1686	getTargetMachine().shouldAssumeDSOLocal(GV))) {
1687	// For DSO-local TLS variables we use offset from __tls_base
1688
1689	MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1690	auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
1691	: WebAssembly::GLOBAL_GET_I32;
1692	const char *BaseName = MF.createExternalSymbolName(Name: "__tls_base");
1693
1694	SDValue BaseAddr(
1695	DAG.getMachineNode(GlobalGet, DL, PtrVT,
1696	DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT)),
1697	`0`);
1698
1699	SDValue TLSOffset = DAG.getTargetGlobalAddress(
1700	GV, DL, VT: PtrVT, offset: GA->getOffset(), TargetFlags: WebAssemblyII::MO_TLS_BASE_REL);
1701	SDValue SymOffset =
1702	DAG.getNode(Opcode: WebAssemblyISD::WrapperREL, DL, VT: PtrVT, Operand: TLSOffset);
1703
1704	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BaseAddr, N2: SymOffset);
1705	}
1706
1707	assert(model == GlobalValue::GeneralDynamicTLSModel);
1708
1709	EVT VT = Op.getValueType();
1710	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
1711	Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT,
1712	offset: GA->getOffset(),
1713	TargetFlags: WebAssemblyII::MO_GOT_TLS));
1714	}
1715
1716	SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1717	SelectionDAG &DAG) const {
1718	SDLoc DL(Op);
1719	const auto *GA = cast<GlobalAddressSDNode>(Val&: Op);
1720	EVT VT = Op.getValueType();
1721	assert(GA->getTargetFlags() == `0` &&
1722	"Unexpected target flags on generic GlobalAddressSDNode");
1723	if (!WebAssembly::isValidAddressSpace(AS: GA->getAddressSpace()))
1724	fail(DL, DAG, Msg: "Invalid address space for WebAssembly target");
1725
1726	unsigned OperandFlags = `0`;
1727	const GlobalValue *GV = GA->getGlobal();
1728	// Since WebAssembly tables cannot yet be shared accross modules, we don't
1729	// need special treatment for tables in PIC mode.
1730	if (isPositionIndependent() &&
1731	!WebAssembly::isWebAssemblyTableType(Ty: GV->getValueType())) {
1732	if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
1733	MachineFunction &MF = DAG.getMachineFunction();
1734	MVT PtrVT = getPointerTy(DL: MF.getDataLayout());
1735	const char *BaseName;
1736	if (GV->getValueType()->isFunctionTy()) {
1737	BaseName = MF.createExternalSymbolName(Name: "__table_base");
1738	OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
1739	} else {
1740	BaseName = MF.createExternalSymbolName(Name: "__memory_base");
1741	OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
1742	}
1743	SDValue BaseAddr =
1744	DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT,
1745	Operand: DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT));
1746
1747	SDValue SymAddr = DAG.getNode(
1748	Opcode: WebAssemblyISD::WrapperREL, DL, VT,
1749	Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT, offset: GA->getOffset(),
1750	TargetFlags: OperandFlags));
1751
1752	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: BaseAddr, N2: SymAddr);
1753	}
1754	OperandFlags = WebAssemblyII::MO_GOT;
1755	}
1756
1757	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
1758	Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT,
1759	offset: GA->getOffset(), TargetFlags: OperandFlags));
1760	}
1761
1762	SDValue
1763	WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
1764	SelectionDAG &DAG) const {
1765	SDLoc DL(Op);
1766	const auto *ES = cast<ExternalSymbolSDNode>(Val&: Op);
1767	EVT VT = Op.getValueType();
1768	assert(ES->getTargetFlags() == `0` &&
1769	"Unexpected target flags on generic ExternalSymbolSDNode");
1770	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
1771	Operand: DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT));
1772	}
1773
1774	SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
1775	SelectionDAG &DAG) const {
1776	// There's no need for a Wrapper node because we always incorporate a jump
1777	// table operand into a BR_TABLE instruction, rather than ever
1778	// materializing it in a register.
1779	const JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
1780	return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Op.getValueType(),
1781	TargetFlags: JT->getTargetFlags());
1782	}
1783
1784	SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
1785	SelectionDAG &DAG) const {
1786	SDLoc DL(Op);
1787	SDValue Chain = Op.getOperand(i: `0`);
1788	const auto *JT = cast<JumpTableSDNode>(Val: Op.getOperand(i: `1`));
1789	SDValue Index = Op.getOperand(i: `2`);
1790	assert(JT->getTargetFlags() == `0` && "WebAssembly doesn't set target flags");
1791
1792	SmallVector<SDValue, `8`> Ops;
1793	Ops.push_back(Elt: Chain);
1794	Ops.push_back(Elt: Index);
1795
1796	MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
1797	const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
1798
1799	// Add an operand for each case.
1800	for (auto *MBB : MBBs)
1801	Ops.push_back(Elt: DAG.getBasicBlock(MBB));
1802
1803	// Add the first MBB as a dummy default target for now. This will be replaced
1804	// with the proper default target (and the preceding range check eliminated)
1805	// if possible by WebAssemblyFixBrTableDefaults.
1806	Ops.push_back(Elt: DAG.getBasicBlock(MBB: *MBBs.begin()));
1807	return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
1808	}
1809
1810	SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
1811	SelectionDAG &DAG) const {
1812	SDLoc DL(Op);
1813	EVT PtrVT = getPointerTy(DL: DAG.getMachineFunction().getDataLayout());
1814
1815	auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
1816	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
1817
1818	SDValue ArgN = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL,
1819	Reg: MFI->getVarargBufferVreg(), VT: PtrVT);
1820	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: ArgN, Ptr: Op.getOperand(i: `1`),
1821	PtrInfo: MachinePointerInfo (SV));
1822	}
1823
1824	SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
1825	SelectionDAG &DAG) const {
1826	MachineFunction &MF = DAG.getMachineFunction();
1827	unsigned IntNo;
1828	switch (Op.getOpcode()) {
1829	case ISD::INTRINSIC_VOID:
1830	case ISD::INTRINSIC_W_CHAIN:
1831	IntNo = Op.getConstantOperandVal(i: `1`);
1832	break;
1833	case ISD::INTRINSIC_WO_CHAIN:
1834	IntNo = Op.getConstantOperandVal(i: `0`);
1835	break;
1836	default:
1837	llvm_unreachable("Invalid intrinsic");
1838	}
1839	SDLoc DL(Op);
1840
1841	switch (IntNo) {
1842	default:
1843	return SDValue (); // Don't custom lower most intrinsics.
1844
1845	case Intrinsic::wasm_lsda: {
1846	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
1847	const char *SymName = MF.createExternalSymbolName(
1848	Name: "GCC_except_table" + std::to_string(val: MF.getFunctionNumber()));
1849	if (isPositionIndependent()) {
1850	SDValue Node = DAG.getTargetExternalSymbol(
1851	Sym: SymName, VT: PtrVT, TargetFlags: WebAssemblyII::MO_MEMORY_BASE_REL);
1852	const char *BaseName = MF.createExternalSymbolName(Name: "__memory_base");
1853	SDValue BaseAddr =
1854	DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT,
1855	Operand: DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT));
1856	SDValue SymAddr =
1857	DAG.getNode(Opcode: WebAssemblyISD::WrapperREL, DL, VT: PtrVT, Operand: Node);
1858	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BaseAddr, N2: SymAddr);
1859	}
1860	SDValue Node = DAG.getTargetExternalSymbol(Sym: SymName, VT: PtrVT);
1861	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT, Operand: Node);
1862	}
1863
1864	case Intrinsic::wasm_shuffle: {
1865	// Drop in-chain and replace undefs, but otherwise pass through unchanged
1866	SDValue Ops[`18`];
1867	size_t OpIdx = `0`;
1868	Ops[OpIdx++] = Op.getOperand(i: `1`);
1869	Ops[OpIdx++] = Op.getOperand(i: `2`);
1870	while (OpIdx < `18`) {
1871	const SDValue &MaskIdx = Op.getOperand(i: OpIdx + `1`);
1872	if (MaskIdx.isUndef() \|\| MaskIdx.getNode()->getAsZExtVal() >= `32`) {
1873	bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
1874	Ops[OpIdx++] = DAG.getConstant(`0`, DL, MVT::i32, isTarget);
1875	} else {
1876	Ops[OpIdx++] = MaskIdx;
1877	}
1878	}
1879	return DAG.getNode(Opcode: WebAssemblyISD::SHUFFLE, DL, VT: Op.getValueType(), Ops);
1880	}
1881	}
1882	}
1883
1884	SDValue
1885	WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
1886	SelectionDAG &DAG) const {
1887	SDLoc DL(Op);
1888	// If sign extension operations are disabled, allow sext_inreg only if operand
1889	// is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
1890	// extension operations, but allowing sext_inreg in this context lets us have
1891	// simple patterns to select extract_lane_s instructions. Expanding sext_inreg
1892	// everywhere would be simpler in this file, but would necessitate large and
1893	// brittle patterns to undo the expansion and select extract_lane_s
1894	// instructions.
1895	assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
1896	if (Op.getOperand(i: `0`).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1897	return SDValue ();
1898
1899	const SDValue &Extract = Op.getOperand(i: `0`);
1900	MVT VecT = Extract.getOperand(i: `0`).getSimpleValueType();
1901	if (VecT.getVectorElementType().getSizeInBits() > `32`)
1902	return SDValue ();
1903	MVT ExtractedLaneT =
1904	cast<VTSDNode>(Val: Op.getOperand(i: `1`).getNode())->getVT().getSimpleVT();
1905	MVT ExtractedVecT =
1906	MVT::getVectorVT(VT: ExtractedLaneT, NumElements: `128` / ExtractedLaneT.getSizeInBits());
1907	if (ExtractedVecT == VecT)
1908	return Op;
1909
1910	// Bitcast vector to appropriate type to ensure ISel pattern coverage
1911	const SDNode *Index = Extract.getOperand(i: `1`).getNode();
1912	if (!isa<ConstantSDNode>(Val: Index))
1913	return SDValue ();
1914	unsigned IndexVal = Index->getAsZExtVal();
1915	unsigned Scale =
1916	ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
1917	assert(Scale > `1`);
1918	SDValue NewIndex =
1919	DAG.getConstant(Val: IndexVal * Scale, DL, VT: Index->getValueType(ResNo: `0`));
1920	SDValue NewExtract = DAG.getNode(
1921	Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: Extract.getValueType(),
1922	N1: DAG.getBitcast(VT: ExtractedVecT, V: Extract.getOperand(i: `0`)), N2: NewIndex);
1923	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: Op.getValueType(), N1: NewExtract,
1924	N2: Op.getOperand(i: `1`));
1925	}
1926
1927	SDValue
1928	WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
1929	SelectionDAG &DAG) const {
1930	SDLoc DL(Op);
1931	EVT VT = Op.getValueType();
1932	SDValue Src = Op.getOperand(i: `0`);
1933	EVT SrcVT = Src.getValueType();
1934
1935	if (SrcVT.getVectorElementType() == MVT::i1 \|\|
1936	SrcVT.getVectorElementType() == MVT::i64)
1937	return SDValue ();
1938
1939	assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == `0` &&
1940	"Unexpected extension factor.");
1941	unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
1942
1943	if (Scale != `2` && Scale != `4` && Scale != `8`)
1944	return SDValue ();
1945
1946	unsigned Ext;
1947	switch (Op.getOpcode()) {
1948	case ISD::ZERO_EXTEND_VECTOR_INREG:
1949	Ext = WebAssemblyISD::EXTEND_LOW_U;
1950	break;
1951	case ISD::SIGN_EXTEND_VECTOR_INREG:
1952	Ext = WebAssemblyISD::EXTEND_LOW_S;
1953	break;
1954	}
1955
1956	SDValue Ret = Src;
1957	while (Scale != `1`) {
1958	Ret = DAG.getNode(Opcode: Ext, DL,
1959	VT: Ret.getValueType()
1960	.widenIntegerVectorElementType(Context&: *DAG.getContext())
1961	.getHalfNumVectorElementsVT(Context&: *DAG.getContext()),
1962	Operand: Ret);
1963	Scale /= `2`;
1964	}
1965	assert(Ret.getValueType() == VT);
1966	return Ret;
1967	}
1968
1969	static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
1970	SDLoc DL(Op);
1971	if (Op.getValueType() != MVT::v2f64)
1972	return SDValue ();
1973
1974	auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
1975	unsigned &Index) -> bool {
1976	switch (Op.getOpcode()) {
1977	case ISD::SINT_TO_FP:
1978	Opcode = WebAssemblyISD::CONVERT_LOW_S;
1979	break;
1980	case ISD::UINT_TO_FP:
1981	Opcode = WebAssemblyISD::CONVERT_LOW_U;
1982	break;
1983	case ISD::FP_EXTEND:
1984	Opcode = WebAssemblyISD::PROMOTE_LOW;
1985	break;
1986	default:
1987	return false;
1988	}
1989
1990	auto ExtractVector = Op.getOperand(i: `0`);
1991	if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1992	return false;
1993
1994	if (!isa<ConstantSDNode>(Val: ExtractVector.getOperand(i: `1`).getNode()))
1995	return false;
1996
1997	SrcVec = ExtractVector.getOperand(i: `0`);
1998	Index = ExtractVector.getConstantOperandVal(i: `1`);
1999	return true;
2000	};
2001
2002	unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2003	SDValue LHSSrcVec, RHSSrcVec;
2004	if (!GetConvertedLane (Op.getOperand(i: `0`), LHSOpcode, LHSSrcVec, LHSIndex) \|\|
2005	!GetConvertedLane (Op.getOperand(i: `1`), RHSOpcode, RHSSrcVec, RHSIndex))
2006	return SDValue ();
2007
2008	if (LHSOpcode != RHSOpcode)
2009	return SDValue ();
2010
2011	MVT ExpectedSrcVT;
2012	switch (LHSOpcode) {
2013	case WebAssemblyISD::CONVERT_LOW_S:
2014	case WebAssemblyISD::CONVERT_LOW_U:
2015	ExpectedSrcVT = MVT::v4i32;
2016	break;
2017	case WebAssemblyISD::PROMOTE_LOW:
2018	ExpectedSrcVT = MVT::v4f32;
2019	break;
2020	}
2021	if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2022	return SDValue ();
2023
2024	auto Src = LHSSrcVec;
2025	if (LHSIndex != `0` \|\| RHSIndex != `1` \|\| LHSSrcVec != RHSSrcVec) {
2026	// Shuffle the source vector so that the converted lanes are the low lanes.
2027	Src = DAG.getVectorShuffle(
2028	VT: ExpectedSrcVT, dl: DL, N1: LHSSrcVec, N2: RHSSrcVec,
2029	Mask: {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + `4`, -`1`, -`1`});
2030	}
2031	return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2032	}
2033
2034	SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2035	SelectionDAG &DAG) const {
2036	if (auto ConvertLow = LowerConvertLow(Op, DAG))
2037	return ConvertLow;
2038
2039	SDLoc DL(Op);
2040	const EVT VecT = Op.getValueType();
2041	const EVT LaneT = Op.getOperand(i: `0`).getValueType();
2042	const size_t Lanes = Op.getNumOperands();
2043	bool CanSwizzle = VecT == MVT::v16i8;
2044
2045	// BUILD_VECTORs are lowered to the instruction that initializes the highest
2046	// possible number of lanes at once followed by a sequence of replace_lane
2047	// instructions to individually initialize any remaining lanes.
2048
2049	// TODO: Tune this. For example, lanewise swizzling is very expensive, so
2050	// swizzled lanes should be given greater weight.
2051
2052	// TODO: Investigate looping rather than always extracting/replacing specific
2053	// lanes to fill gaps.
2054
2055	auto IsConstant = [](const SDValue &V) {
2056	return V.getOpcode() == ISD::Constant \|\| V.getOpcode() == ISD::ConstantFP;
2057	};
2058
2059	// Returns the source vector and index vector pair if they exist. Checks for:
2060	// (extract_vector_elt
2061	// $src,
2062	// (sign_extend_inreg (extract_vector_elt $indices, $i))
2063	// )
2064	auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2065	auto Bail = std::make_pair(x: SDValue (), y: SDValue ());
2066	if (Lane ->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2067	return Bail;
2068	const SDValue &SwizzleSrc = Lane ->getOperand(Num: `0`);
2069	const SDValue &IndexExt = Lane ->getOperand(Num: `1`);
2070	if (IndexExt ->getOpcode() != ISD::SIGN_EXTEND_INREG)
2071	return Bail;
2072	const SDValue &Index = IndexExt ->getOperand(Num: `0`);
2073	if (Index ->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2074	return Bail;
2075	const SDValue &SwizzleIndices = Index ->getOperand(Num: `0`);
2076	if (SwizzleSrc.getValueType() != MVT::v16i8 \|\|
2077	SwizzleIndices.getValueType() != MVT::v16i8 \|\|
2078	Index->getOperand(`1`)->getOpcode() != ISD::Constant \|\|
2079	Index->getConstantOperandVal(`1`) != I)
2080	return Bail;
2081	return std::make_pair(x: SwizzleSrc, y: SwizzleIndices);
2082	};
2083
2084	// If the lane is extracted from another vector at a constant index, return
2085	// that vector. The source vector must not have more lanes than the dest
2086	// because the shufflevector indices are in terms of the destination lanes and
2087	// would not be able to address the smaller individual source lanes.
2088	auto GetShuffleSrc = [&](const SDValue &Lane) {
2089	if (Lane ->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2090	return SDValue ();
2091	if (!isa<ConstantSDNode>(Val: Lane ->getOperand(Num: `1`).getNode()))
2092	return SDValue ();
2093	if (Lane ->getOperand(Num: `0`).getValueType().getVectorNumElements() >
2094	VecT.getVectorNumElements())
2095	return SDValue ();
2096	return Lane ->getOperand(Num: `0`);
2097	};
2098
2099	using ValueEntry = std::pair<SDValue, size_t>;
2100	SmallVector<ValueEntry, `16`> SplatValueCounts;
2101
2102	using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2103	SmallVector<SwizzleEntry, `16`> SwizzleCounts;
2104
2105	using ShuffleEntry = std::pair<SDValue, size_t>;
2106	SmallVector<ShuffleEntry, `16`> ShuffleCounts;
2107
2108	auto AddCount = [](auto &Counts, const auto &Val) {
2109	auto CountIt =
2110	llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2111	if (CountIt == Counts.end()) {
2112	Counts.emplace_back(Val, `1`);
2113	} else {
2114	CountIt->second++;
2115	}
2116	};
2117
2118	auto GetMostCommon = [](auto &Counts) {
2119	auto CommonIt =
2120	std::max_element(Counts.begin(), Counts.end(), llvm::less_second ());
2121	assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2122	return *CommonIt;
2123	};
2124
2125	size_t NumConstantLanes = `0`;
2126
2127	// Count eligible lanes for each type of vector creation op
2128	for (size_t I = `0`; I < Lanes; ++I) {
2129	const SDValue &Lane = Op ->getOperand(Num: I);
2130	if (Lane.isUndef())
2131	continue;
2132
2133	AddCount (SplatValueCounts, Lane);
2134
2135	if (IsConstant (Lane))
2136	NumConstantLanes++;
2137	if (auto ShuffleSrc = GetShuffleSrc (Lane))
2138	AddCount (ShuffleCounts, ShuffleSrc);
2139	if (CanSwizzle) {
2140	auto SwizzleSrcs = GetSwizzleSrcs (I, Lane);
2141	if (SwizzleSrcs.first)
2142	AddCount (SwizzleCounts, SwizzleSrcs);
2143	}
2144	}
2145
2146	SDValue SplatValue;
2147	size_t NumSplatLanes;
2148	std::tie(args&: SplatValue, args&: NumSplatLanes) = GetMostCommon (SplatValueCounts);
2149
2150	SDValue SwizzleSrc;
2151	SDValue SwizzleIndices;
2152	size_t NumSwizzleLanes = `0`;
2153	if (SwizzleCounts.size())
2154	std::forward_as_tuple(args: std::tie(args&: SwizzleSrc, args&: SwizzleIndices),
2155	args&: NumSwizzleLanes) = GetMostCommon (SwizzleCounts);
2156
2157	// Shuffles can draw from up to two vectors, so find the two most common
2158	// sources.
2159	SDValue ShuffleSrc1, ShuffleSrc2;
2160	size_t NumShuffleLanes = `0`;
2161	if (ShuffleCounts.size()) {
2162	std::tie(args&: ShuffleSrc1, args&: NumShuffleLanes) = GetMostCommon (ShuffleCounts);
2163	llvm::erase_if(C&: ShuffleCounts,
2164	P: [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2165	}
2166	if (ShuffleCounts.size()) {
2167	size_t AdditionalShuffleLanes;
2168	std::tie(args&: ShuffleSrc2, args&: AdditionalShuffleLanes) =
2169	GetMostCommon (ShuffleCounts);
2170	NumShuffleLanes += AdditionalShuffleLanes;
2171	}
2172
2173	// Predicate returning true if the lane is properly initialized by the
2174	// original instruction
2175	std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2176	SDValue Result;
2177	// Prefer swizzles over shuffles over vector consts over splats
2178	if (NumSwizzleLanes >= NumShuffleLanes &&
2179	NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2180	Result = DAG.getNode(Opcode: WebAssemblyISD::SWIZZLE, DL, VT: VecT, N1: SwizzleSrc,
2181	N2: SwizzleIndices);
2182	auto Swizzled = std::make_pair(x&: SwizzleSrc, y&: SwizzleIndices);
2183	IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2184	return Swizzled == GetSwizzleSrcs (I, Lane);
2185	};
2186	} else if (NumShuffleLanes >= NumConstantLanes &&
2187	NumShuffleLanes >= NumSplatLanes) {
2188	size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / `8`;
2189	size_t DestLaneCount = VecT.getVectorNumElements();
2190	size_t Scale1 = `1`;
2191	size_t Scale2 = `1`;
2192	SDValue Src1 = ShuffleSrc1;
2193	SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VT: VecT);
2194	if (Src1.getValueType() != VecT) {
2195	size_t LaneSize =
2196	Src1.getValueType().getVectorElementType().getFixedSizeInBits() / `8`;
2197	assert(LaneSize > DestLaneSize);
2198	Scale1 = LaneSize / DestLaneSize;
2199	Src1 = DAG.getBitcast(VT: VecT, V: Src1);
2200	}
2201	if (Src2.getValueType() != VecT) {
2202	size_t LaneSize =
2203	Src2.getValueType().getVectorElementType().getFixedSizeInBits() / `8`;
2204	assert(LaneSize > DestLaneSize);
2205	Scale2 = LaneSize / DestLaneSize;
2206	Src2 = DAG.getBitcast(VT: VecT, V: Src2);
2207	}
2208
2209	int Mask[`16`];
2210	assert(DestLaneCount <= `16`);
2211	for (size_t I = `0`; I < DestLaneCount; ++I) {
2212	const SDValue &Lane = Op ->getOperand(Num: I);
2213	SDValue Src = GetShuffleSrc (Lane);
2214	if (Src == ShuffleSrc1) {
2215	Mask[I] = Lane ->getConstantOperandVal(Num: `1`) * Scale1;
2216	} else if (Src && Src == ShuffleSrc2) {
2217	Mask[I] = DestLaneCount + Lane ->getConstantOperandVal(Num: `1`) * Scale2;
2218	} else {
2219	Mask[I] = -`1`;
2220	}
2221	}
2222	ArrayRef<int> MaskRef(Mask, DestLaneCount);
2223	Result = DAG.getVectorShuffle(VT: VecT, dl: DL, N1: Src1, N2: Src2, Mask: MaskRef);
2224	IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2225	auto Src = GetShuffleSrc (Lane);
2226	return Src == ShuffleSrc1 \|\| (Src && Src == ShuffleSrc2);
2227	};
2228	} else if (NumConstantLanes >= NumSplatLanes) {
2229	SmallVector<SDValue, `16`> ConstLanes;
2230	for (const SDValue &Lane : Op ->op_values()) {
2231	if (IsConstant (Lane)) {
2232	// Values may need to be fixed so that they will sign extend to be
2233	// within the expected range during ISel. Check whether the value is in
2234	// bounds based on the lane bit width and if it is out of bounds, lop
2235	// off the extra bits and subtract 2^n to reflect giving the high bit
2236	// value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
2237	// cannot possibly be out of range.
2238	auto *Const = dyn_cast<ConstantSDNode>(Val: Lane.getNode());
2239	int64_t Val = Const ? Const->getSExtValue() : `0`;
2240	uint64_t LaneBits = `128` / Lanes;
2241	assert((LaneBits == `64` \|\| Val >= -(`1ll` << (LaneBits - `1`))) &&
2242	"Unexpected out of bounds negative value");
2243	if (Const && LaneBits != `64` && Val > (`1ll` << (LaneBits - `1`)) - `1`) {
2244	uint64_t Mask = (`1ll` << LaneBits) - `1`;
2245	auto NewVal = (((uint64_t)Val & Mask) - (`1ll` << LaneBits)) & Mask;
2246	ConstLanes.push_back(Elt: DAG.getConstant(Val: NewVal, DL: SDLoc (Lane), VT: LaneT));
2247	} else {
2248	ConstLanes.push_back(Elt: Lane);
2249	}
2250	} else if (LaneT.isFloatingPoint()) {
2251	ConstLanes.push_back(Elt: DAG.getConstantFP(Val: `0`, DL, VT: LaneT));
2252	} else {
2253	ConstLanes.push_back(Elt: DAG.getConstant(Val: `0`, DL, VT: LaneT));
2254	}
2255	}
2256	Result = DAG.getBuildVector(VT: VecT, DL, Ops: ConstLanes);
2257	IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2258	return IsConstant (Lane);
2259	};
2260	} else {
2261	// Use a splat (which might be selected as a load splat)
2262	Result = DAG.getSplatBuildVector(VT: VecT, DL, Op: SplatValue);
2263	IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2264	return Lane == SplatValue;
2265	};
2266	}
2267
2268	assert(Result);
2269	assert(IsLaneConstructed);
2270
2271	// Add replace_lane instructions for any unhandled values
2272	for (size_t I = `0`; I < Lanes; ++I) {
2273	const SDValue &Lane = Op ->getOperand(Num: I);
2274	if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2275	Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2276	DAG.getConstant(I, DL, MVT::i32));
2277	}
2278
2279	return Result;
2280	}
2281
2282	SDValue
2283	WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2284	SelectionDAG &DAG) const {
2285	SDLoc DL(Op);
2286	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Val: Op.getNode())->getMask();
2287	MVT VecType = Op.getOperand(i: `0`).getSimpleValueType();
2288	assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2289	size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / `8`;
2290
2291	// Space for two vector args and sixteen mask indices
2292	SDValue Ops[`18`];
2293	size_t OpIdx = `0`;
2294	Ops[OpIdx++] = Op.getOperand(i: `0`);
2295	Ops[OpIdx++] = Op.getOperand(i: `1`);
2296
2297	// Expand mask indices to byte indices and materialize them as operands
2298	for (int M : Mask) {
2299	for (size_t J = `0`; J < LaneBytes; ++J) {
2300	// Lower undefs (represented by -1 in mask) to {0..J}, which use a
2301	// whole lane of vector input, to allow further reduction at VM. E.g.
2302	// match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2303	uint64_t ByteIndex = M == -`1` ? J : (uint64_t)M * LaneBytes + J;
2304	Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2305	}
2306	}
2307
2308	return DAG.getNode(Opcode: WebAssemblyISD::SHUFFLE, DL, VT: Op.getValueType(), Ops);
2309	}
2310
2311	SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2312	SelectionDAG &DAG) const {
2313	SDLoc DL(Op);
2314	// The legalizer does not know how to expand the unsupported comparison modes
2315	// of i64x2 vectors, so we manually unroll them here.
2316	assert(Op->getOperand(`0`)->getSimpleValueType(`0`) == MVT::v2i64);
2317	SmallVector<SDValue, `2`> LHS, RHS;
2318	DAG.ExtractVectorElements(Op: Op ->getOperand(Num: `0`), Args&: LHS);
2319	DAG.ExtractVectorElements(Op: Op ->getOperand(Num: `1`), Args&: RHS);
2320	const SDValue &CC = Op ->getOperand(Num: `2`);
2321	auto MakeLane = [&](unsigned I) {
2322	return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2323	DAG.getConstant(uint64_t(-`1`), DL, MVT::i64),
2324	DAG.getConstant(uint64_t(`0`), DL, MVT::i64), CC);
2325	};
2326	return DAG.getBuildVector(Op ->getValueType(ResNo: `0`), DL,
2327	{MakeLane (`0`), MakeLane (`1`)});
2328	}
2329
2330	SDValue
2331	WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2332	SelectionDAG &DAG) const {
2333	// Allow constant lane indices, expand variable lane indices
2334	SDNode *IdxNode = Op.getOperand(i: Op.getNumOperands() - `1`).getNode();
2335	if (isa<ConstantSDNode>(Val: IdxNode)) {
2336	// Ensure the index type is i32 to match the tablegen patterns
2337	uint64_t Idx = IdxNode->getAsZExtVal();
2338	SmallVector<SDValue, `3`> Ops(Op.getNode()->ops());
2339	Ops[Op.getNumOperands() - `1`] =
2340	DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2341	return DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT: Op.getValueType(), Ops);
2342	}
2343	// Perform default expansion
2344	return SDValue ();
2345	}
2346
2347	static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
2348	EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2349	// 32-bit and 64-bit unrolled shifts will have proper semantics
2350	if (LaneT.bitsGE(MVT::i32))
2351	return DAG.UnrollVectorOp(N: Op.getNode());
2352	// Otherwise mask the shift value to get proper semantics from 32-bit shift
2353	SDLoc DL(Op);
2354	size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2355	SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - `1`, DL, MVT::i32);
2356	unsigned ShiftOpcode = Op.getOpcode();
2357	SmallVector<SDValue, `16`> ShiftedElements;
2358	DAG.ExtractVectorElements(Op.getOperand(`0`), ShiftedElements, `0`, `0`, MVT::i32);
2359	SmallVector<SDValue, `16`> ShiftElements;
2360	DAG.ExtractVectorElements(Op.getOperand(`1`), ShiftElements, `0`, `0`, MVT::i32);
2361	SmallVector<SDValue, `16`> UnrolledOps;
2362	for (size_t i = `0`; i < NumLanes; ++i) {
2363	SDValue MaskedShiftValue =
2364	DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2365	SDValue ShiftedValue = ShiftedElements [i];
2366	if (ShiftOpcode == ISD::SRA)
2367	ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2368	ShiftedValue, DAG.getValueType(LaneT));
2369	UnrolledOps.push_back(
2370	DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2371	}
2372	return DAG.getBuildVector(VT: Op.getValueType(), DL, Ops: UnrolledOps);
2373	}
2374
2375	SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2376	SelectionDAG &DAG) const {
2377	SDLoc DL(Op);
2378
2379	// Only manually lower vector shifts
2380	assert(Op.getSimpleValueType().isVector());
2381
2382	uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2383	auto ShiftVal = Op.getOperand(i: `1`);
2384
2385	// Try to skip bitmask operation since it is implied inside shift instruction
2386	auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2387	if (MaskOp.getOpcode() != ISD::AND)
2388	return MaskOp;
2389	SDValue LHS = MaskOp.getOperand(i: `0`);
2390	SDValue RHS = MaskOp.getOperand(i: `1`);
2391	if (MaskOp.getValueType().isVector()) {
2392	APInt MaskVal;
2393	if (!ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: MaskVal))
2394	std::swap(a&: LHS, b&: RHS);
2395
2396	if (ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: MaskVal) &&
2397	MaskVal == MaskBits)
2398	MaskOp = LHS;
2399	} else {
2400	if (!isa<ConstantSDNode>(Val: RHS.getNode()))
2401	std::swap(a&: LHS, b&: RHS);
2402
2403	auto ConstantRHS = dyn_cast<ConstantSDNode>(Val: RHS.getNode());
2404	if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2405	MaskOp = LHS;
2406	}
2407
2408	return MaskOp;
2409	};
2410
2411	// Skip vector and operation
2412	ShiftVal = SkipImpliedMask (ShiftVal, LaneBits - `1`);
2413	ShiftVal = DAG.getSplatValue(V: ShiftVal);
2414	if (!ShiftVal)
2415	return unrollVectorShift(Op, DAG);
2416
2417	// Skip scalar and operation
2418	ShiftVal = SkipImpliedMask (ShiftVal, LaneBits - `1`);
2419	// Use anyext because none of the high bits can affect the shift
2420	ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2421
2422	unsigned Opcode;
2423	switch (Op.getOpcode()) {
2424	case ISD::SHL:
2425	Opcode = WebAssemblyISD::VEC_SHL;
2426	break;
2427	case ISD::SRA:
2428	Opcode = WebAssemblyISD::VEC_SHR_S;
2429	break;
2430	case ISD::SRL:
2431	Opcode = WebAssemblyISD::VEC_SHR_U;
2432	break;
2433	default:
2434	llvm_unreachable("unexpected opcode");
2435	}
2436
2437	return DAG.getNode(Opcode, DL, VT: Op.getValueType(), N1: Op.getOperand(i: `0`), N2: ShiftVal);
2438	}
2439
2440	SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2441	SelectionDAG &DAG) const {
2442	SDLoc DL(Op);
2443	EVT ResT = Op.getValueType();
2444	EVT SatVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2445
2446	if ((ResT == MVT::i32 \|\| ResT == MVT::i64) &&
2447	(SatVT == MVT::i32 \|\| SatVT == MVT::i64))
2448	return Op;
2449
2450	if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2451	return Op;
2452
2453	return SDValue ();
2454	}
2455
2456	//===----------------------------------------------------------------------===//
2457	// Custom DAG combine hooks
2458	//===----------------------------------------------------------------------===//
2459	static SDValue
2460	performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2461	auto &DAG = DCI.DAG;
2462	auto Shuffle = cast<ShuffleVectorSDNode>(Val: N);
2463
2464	// Hoist vector bitcasts that don't change the number of lanes out of unary
2465	// shuffles, where they are less likely to get in the way of other combines.
2466	// (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2467	// (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2468	SDValue Bitcast = N->getOperand(Num: `0`);
2469	if (Bitcast.getOpcode() != ISD::BITCAST)
2470	return SDValue ();
2471	if (!N->getOperand(Num: `1`).isUndef())
2472	return SDValue ();
2473	SDValue CastOp = Bitcast.getOperand(i: `0`);
2474	EVT SrcType = CastOp.getValueType();
2475	EVT DstType = Bitcast.getValueType();
2476	if (!SrcType.is128BitVector() \|\|
2477	SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2478	return SDValue ();
2479	SDValue NewShuffle = DAG.getVectorShuffle(
2480	VT: SrcType, dl: SDLoc (N), N1: CastOp, N2: DAG.getUNDEF(VT: SrcType), Mask: Shuffle->getMask());
2481	return DAG.getBitcast(VT: DstType, V: NewShuffle);
2482	}
2483
2484	/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2485	/// split up into scalar instructions during legalization, and the vector
2486	/// extending instructions are selected in performVectorExtendCombine below.
2487	static SDValue
2488	performVectorExtendToFPCombine(SDNode *N,
2489	TargetLowering::DAGCombinerInfo &DCI) {
2490	auto &DAG = DCI.DAG;
2491	assert(N->getOpcode() == ISD::UINT_TO_FP \|\|
2492	N->getOpcode() == ISD::SINT_TO_FP);
2493
2494	EVT InVT = N->getOperand(Num: `0`)->getValueType(ResNo: `0`);
2495	EVT ResVT = N->getValueType(ResNo: `0`);
2496	MVT ExtVT;
2497	if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 \|\| InVT == MVT::v4i8))
2498	ExtVT = MVT::v4i32;
2499	else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 \|\| InVT == MVT::v2i8))
2500	ExtVT = MVT::v2i32;
2501	else
2502	return SDValue ();
2503
2504	unsigned Op =
2505	N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
2506	SDValue Conv = DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ExtVT, Operand: N->getOperand(Num: `0`));
2507	return DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc (N), VT: ResVT, Operand: Conv);
2508	}
2509
2510	static SDValue
2511	performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2512	auto &DAG = DCI.DAG;
2513	assert(N->getOpcode() == ISD::SIGN_EXTEND \|\|
2514	N->getOpcode() == ISD::ZERO_EXTEND);
2515
2516	// Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2517	// possible before the extract_subvector can be expanded.
2518	auto Extract = N->getOperand(Num: `0`);
2519	if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2520	return SDValue ();
2521	auto Source = Extract.getOperand(i: `0`);
2522	auto *IndexNode = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: `1`));
2523	if (IndexNode == nullptr)
2524	return SDValue ();
2525	auto Index = IndexNode->getZExtValue();
2526
2527	// Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2528	// extracted subvector is the low or high half of its source.
2529	EVT ResVT = N->getValueType(ResNo: `0`);
2530	if (ResVT == MVT::v8i16) {
2531	if (Extract.getValueType() != MVT::v8i8 \|\|
2532	Source.getValueType() != MVT::v16i8 \|\| (Index != `0` && Index != `8`))
2533	return SDValue ();
2534	} else if (ResVT == MVT::v4i32) {
2535	if (Extract.getValueType() != MVT::v4i16 \|\|
2536	Source.getValueType() != MVT::v8i16 \|\| (Index != `0` && Index != `4`))
2537	return SDValue ();
2538	} else if (ResVT == MVT::v2i64) {
2539	if (Extract.getValueType() != MVT::v2i32 \|\|
2540	Source.getValueType() != MVT::v4i32 \|\| (Index != `0` && Index != `2`))
2541	return SDValue ();
2542	} else {
2543	return SDValue ();
2544	}
2545
2546	bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2547	bool IsLow = Index == `0`;
2548
2549	unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2550	: WebAssemblyISD::EXTEND_HIGH_S)
2551	: (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2552	: WebAssemblyISD::EXTEND_HIGH_U);
2553
2554	return DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ResVT, Operand: Source);
2555	}
2556
2557	static SDValue
2558	performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2559	auto &DAG = DCI.DAG;
2560
2561	auto GetWasmConversionOp = [](unsigned Op) {
2562	switch (Op) {
2563	case ISD::FP_TO_SINT_SAT:
2564	return WebAssemblyISD::TRUNC_SAT_ZERO_S;
2565	case ISD::FP_TO_UINT_SAT:
2566	return WebAssemblyISD::TRUNC_SAT_ZERO_U;
2567	case ISD::FP_ROUND:
2568	return WebAssemblyISD::DEMOTE_ZERO;
2569	}
2570	llvm_unreachable("unexpected op");
2571	};
2572
2573	auto IsZeroSplat = [](SDValue SplatVal) {
2574	auto *Splat = dyn_cast<BuildVectorSDNode>(Val: SplatVal.getNode());
2575	APInt SplatValue, SplatUndef;
2576	unsigned SplatBitSize;
2577	bool HasAnyUndefs;
2578	// Endianness doesn't matter in this context because we are looking for
2579	// an all-zero value.
2580	return Splat &&
2581	Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2582	HasAnyUndefs) &&
2583	SplatValue == `0`;
2584	};
2585
2586	if (N->getOpcode() == ISD::CONCAT_VECTORS) {
2587	// Combine this:
2588	//
2589	// (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
2590	//
2591	// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
2592	//
2593	// Or this:
2594	//
2595	// (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
2596	//
2597	// into (f32x4.demote_zero_f64x2 $x).
2598	EVT ResVT;
2599	EVT ExpectedConversionType;
2600	auto Conversion = N->getOperand(Num: `0`);
2601	auto ConversionOp = Conversion.getOpcode();
2602	switch (ConversionOp) {
2603	case ISD::FP_TO_SINT_SAT:
2604	case ISD::FP_TO_UINT_SAT:
2605	ResVT = MVT::v4i32;
2606	ExpectedConversionType = MVT::v2i32;
2607	break;
2608	case ISD::FP_ROUND:
2609	ResVT = MVT::v4f32;
2610	ExpectedConversionType = MVT::v2f32;
2611	break;
2612	default:
2613	return SDValue ();
2614	}
2615
2616	if (N->getValueType(ResNo: `0`) != ResVT)
2617	return SDValue ();
2618
2619	if (Conversion.getValueType() != ExpectedConversionType)
2620	return SDValue ();
2621
2622	auto Source = Conversion.getOperand(i: `0`);
2623	if (Source.getValueType() != MVT::v2f64)
2624	return SDValue ();
2625
2626	if (!IsZeroSplat (N->getOperand(Num: `1`)) \|\|
2627	N->getOperand(Num: `1`).getValueType() != ExpectedConversionType)
2628	return SDValue ();
2629
2630	unsigned Op = GetWasmConversionOp (ConversionOp);
2631	return DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ResVT, Operand: Source);
2632	}
2633
2634	// Combine this:
2635	//
2636	// (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
2637	//
2638	// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
2639	//
2640	// Or this:
2641	//
2642	// (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
2643	//
2644	// into (f32x4.demote_zero_f64x2 $x).
2645	EVT ResVT;
2646	auto ConversionOp = N->getOpcode();
2647	switch (ConversionOp) {
2648	case ISD::FP_TO_SINT_SAT:
2649	case ISD::FP_TO_UINT_SAT:
2650	ResVT = MVT::v4i32;
2651	break;
2652	case ISD::FP_ROUND:
2653	ResVT = MVT::v4f32;
2654	break;
2655	default:
2656	llvm_unreachable("unexpected op");
2657	}
2658
2659	if (N->getValueType(ResNo: `0`) != ResVT)
2660	return SDValue ();
2661
2662	auto Concat = N->getOperand(Num: `0`);
2663	if (Concat.getValueType() != MVT::v4f64)
2664	return SDValue ();
2665
2666	auto Source = Concat.getOperand(i: `0`);
2667	if (Source.getValueType() != MVT::v2f64)
2668	return SDValue ();
2669
2670	if (!IsZeroSplat(Concat.getOperand(`1`)) \|\|
2671	Concat.getOperand(`1`).getValueType() != MVT::v2f64)
2672	return SDValue ();
2673
2674	unsigned Op = GetWasmConversionOp (ConversionOp);
2675	return DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ResVT, Operand: Source);
2676	}
2677
2678	// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
2679	static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
2680	const SDLoc &DL, unsigned VectorWidth) {
2681	EVT VT = Vec.getValueType();
2682	EVT ElVT = VT.getVectorElementType();
2683	unsigned Factor = VT.getSizeInBits() / VectorWidth;
2684	EVT ResultVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: ElVT,
2685	NumElements: VT.getVectorNumElements() / Factor);
2686
2687	// Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
2688	unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
2689	assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
2690
2691	// This is the index of the first element of the VectorWidth-bit chunk
2692	// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
2693	IdxVal &= ~(ElemsPerChunk - `1`);
2694
2695	// If the input is a buildvector just emit a smaller one.
2696	if (Vec.getOpcode() == ISD::BUILD_VECTOR)
2697	return DAG.getBuildVector(VT: ResultVT, DL,
2698	Ops: Vec ->ops().slice(N: IdxVal, M: ElemsPerChunk));
2699
2700	SDValue VecIdx = DAG.getIntPtrConstant(Val: IdxVal, DL);
2701	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ResultVT, N1: Vec, N2: VecIdx);
2702	}
2703
2704	// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
2705	// is the expected destination value type after recursion. In is the initial
2706	// input. Note that the input should have enough leading zero bits to prevent
2707	// NARROW_U from saturating results.
2708	static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL,
2709	SelectionDAG &DAG) {
2710	EVT SrcVT = In.getValueType();
2711
2712	// No truncation required, we might get here due to recursive calls.
2713	if (SrcVT == DstVT)
2714	return In;
2715
2716	unsigned SrcSizeInBits = SrcVT.getSizeInBits();
2717	unsigned NumElems = SrcVT.getVectorNumElements();
2718	if (!isPowerOf2_32(Value: NumElems))
2719	return SDValue ();
2720	assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
2721	assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
2722
2723	LLVMContext &Ctx = *DAG.getContext();
2724	EVT PackedSVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: SrcVT.getScalarSizeInBits() / `2`);
2725
2726	// Narrow to the largest type possible:
2727	// vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
2728	EVT InVT = MVT::i16, OutVT = MVT::i8;
2729	if (SrcVT.getScalarSizeInBits() > `16`) {
2730	InVT = MVT::i32;
2731	OutVT = MVT::i16;
2732	}
2733	unsigned SubSizeInBits = SrcSizeInBits / `2`;
2734	InVT = EVT::getVectorVT(Context&: Ctx, VT: InVT, NumElements: SubSizeInBits / InVT.getSizeInBits());
2735	OutVT = EVT::getVectorVT(Context&: Ctx, VT: OutVT, NumElements: SubSizeInBits / OutVT.getSizeInBits());
2736
2737	// Split lower/upper subvectors.
2738	SDValue Lo = extractSubVector(Vec: In, IdxVal: `0`, DAG, DL, VectorWidth: SubSizeInBits);
2739	SDValue Hi = extractSubVector(Vec: In, IdxVal: NumElems / `2`, DAG, DL, VectorWidth: SubSizeInBits);
2740
2741	// 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
2742	if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
2743	Lo = DAG.getBitcast(VT: InVT, V: Lo);
2744	Hi = DAG.getBitcast(VT: InVT, V: Hi);
2745	SDValue Res = DAG.getNode(Opcode: WebAssemblyISD::NARROW_U, DL, VT: OutVT, N1: Lo, N2: Hi);
2746	return DAG.getBitcast(VT: DstVT, V: Res);
2747	}
2748
2749	// Recursively narrow lower/upper subvectors, concat result and narrow again.
2750	EVT PackedVT = EVT::getVectorVT(Context&: Ctx, VT: PackedSVT, NumElements: NumElems / `2`);
2751	Lo = truncateVectorWithNARROW(DstVT: PackedVT, In: Lo, DL, DAG);
2752	Hi = truncateVectorWithNARROW(DstVT: PackedVT, In: Hi, DL, DAG);
2753
2754	PackedVT = EVT::getVectorVT(Context&: Ctx, VT: PackedSVT, NumElements: NumElems);
2755	SDValue Res = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PackedVT, N1: Lo, N2: Hi);
2756	return truncateVectorWithNARROW(DstVT, In: Res, DL, DAG);
2757	}
2758
2759	static SDValue performTruncateCombine(SDNode *N,
2760	TargetLowering::DAGCombinerInfo &DCI) {
2761	auto &DAG = DCI.DAG;
2762
2763	SDValue In = N->getOperand(Num: `0`);
2764	EVT InVT = In.getValueType();
2765	if (!InVT.isSimple())
2766	return SDValue ();
2767
2768	EVT OutVT = N->getValueType(ResNo: `0`);
2769	if (!OutVT.isVector())
2770	return SDValue ();
2771
2772	EVT OutSVT = OutVT.getVectorElementType();
2773	EVT InSVT = InVT.getVectorElementType();
2774	// Currently only cover truncate to v16i8 or v8i16.
2775	if (!((InSVT == MVT::i16 \|\| InSVT == MVT::i32 \|\| InSVT == MVT::i64) &&
2776	(OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) && OutVT.is128BitVector()))
2777	return SDValue ();
2778
2779	SDLoc DL(N);
2780	APInt Mask = APInt::getLowBitsSet(numBits: InVT.getScalarSizeInBits(),
2781	loBitsSet: OutVT.getScalarSizeInBits());
2782	In = DAG.getNode(Opcode: ISD::AND, DL, VT: InVT, N1: In, N2: DAG.getConstant(Val: Mask, DL, VT: InVT));
2783	return truncateVectorWithNARROW(DstVT: OutVT, In, DL, DAG);
2784	}
2785
2786	static SDValue performBitcastCombine(SDNode *N,
2787	TargetLowering::DAGCombinerInfo &DCI) {
2788	auto &DAG = DCI.DAG;
2789	SDLoc DL(N);
2790	SDValue Src = N->getOperand(Num: `0`);
2791	EVT VT = N->getValueType(ResNo: `0`);
2792	EVT SrcVT = Src.getValueType();
2793
2794	// bitcast <N x i1> to iN
2795	// ==> bitmask
2796	if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
2797	SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1) {
2798	unsigned NumElts = SrcVT.getVectorNumElements();
2799	if (NumElts != `2` && NumElts != `4` && NumElts != `8` && NumElts != `16`)
2800	return SDValue ();
2801	EVT Width = MVT::getIntegerVT(BitWidth: `128` / NumElts);
2802	return DAG.getZExtOrTrunc(
2803	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
2804	{DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
2805	DAG.getSExtOrTrunc(N->getOperand(`0`), DL,
2806	SrcVT.changeVectorElementType(Width))}),
2807	DL, VT);
2808	}
2809
2810	return SDValue ();
2811	}
2812
2813	static SDValue performSETCCCombine(SDNode *N,
2814	TargetLowering::DAGCombinerInfo &DCI) {
2815	auto &DAG = DCI.DAG;
2816
2817	SDValue LHS = N->getOperand(Num: `0`);
2818	SDValue RHS = N->getOperand(Num: `1`);
2819	ISD::CondCode Cond = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
2820	SDLoc DL(N);
2821	EVT VT = N->getValueType(ResNo: `0`);
2822
2823	// setcc (iN (bitcast (vNi1 X))), 0, ne
2824	// ==> any_true (vNi1 X)
2825	// setcc (iN (bitcast (vNi1 X))), 0, eq
2826	// ==> xor (any_true (vNi1 X)), -1
2827	// setcc (iN (bitcast (vNi1 X))), -1, eq
2828	// ==> all_true (vNi1 X)
2829	// setcc (iN (bitcast (vNi1 X))), -1, ne
2830	// ==> xor (all_true (vNi1 X)), -1
2831	if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
2832	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
2833	(isNullConstant(V: RHS) \|\| isAllOnesConstant(V: RHS)) &&
2834	LHS ->getOpcode() == ISD::BITCAST) {
2835	EVT FromVT = LHS ->getOperand(Num: `0`).getValueType();
2836	if (FromVT.isFixedLengthVector() &&
2837	FromVT.getVectorElementType() == MVT::i1) {
2838	int Intrin = isNullConstant(RHS) ? Intrinsic::wasm_anytrue
2839	: Intrinsic::wasm_alltrue;
2840	unsigned NumElts = FromVT.getVectorNumElements();
2841	if (NumElts != `2` && NumElts != `4` && NumElts != `8` && NumElts != `16`)
2842	return SDValue ();
2843	EVT Width = MVT::getIntegerVT(BitWidth: `128` / NumElts);
2844	SDValue Ret = DAG.getZExtOrTrunc(
2845	DAG.getNode(
2846	ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
2847	{DAG.getConstant(Intrin, DL, MVT::i32),
2848	DAG.getSExtOrTrunc(LHS->getOperand(`0`), DL,
2849	FromVT.changeVectorElementType(Width))}),
2850	DL, MVT::i1);
2851	if ((isNullConstant(V: RHS) && (Cond == ISD::SETEQ)) \|\|
2852	(isAllOnesConstant(V: RHS) && (Cond == ISD::SETNE))) {
2853	Ret = DAG.getNOT(DL, Ret, MVT::i1);
2854	}
2855	return DAG.getZExtOrTrunc(Op: Ret, DL, VT);
2856	}
2857	}
2858
2859	return SDValue ();
2860	}
2861
2862	SDValue
2863	WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
2864	DAGCombinerInfo &DCI) const {
2865	switch (N->getOpcode()) {
2866	default:
2867	return SDValue ();
2868	case ISD::BITCAST:
2869	return performBitcastCombine(N, DCI);
2870	case ISD::SETCC:
2871	return performSETCCCombine(N, DCI);
2872	case ISD::VECTOR_SHUFFLE:
2873	return performVECTOR_SHUFFLECombine(N, DCI);
2874	case ISD::SIGN_EXTEND:
2875	case ISD::ZERO_EXTEND:
2876	return performVectorExtendCombine(N, DCI);
2877	case ISD::UINT_TO_FP:
2878	case ISD::SINT_TO_FP:
2879	return performVectorExtendToFPCombine(N, DCI);
2880	case ISD::FP_TO_SINT_SAT:
2881	case ISD::FP_TO_UINT_SAT:
2882	case ISD::FP_ROUND:
2883	case ISD::CONCAT_VECTORS:
2884	return performVectorTruncZeroCombine(N, DCI);
2885	case ISD::TRUNCATE:
2886	return performTruncateCombine(N, DCI);
2887	}
2888	}
2889

source code of llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp