NeonEmitter.cpp source code [clang/utils/TableGen/NeonEmitter.cpp]

1	//===-- NeonEmitter.cpp - Generate arm_neon.h for use with clang ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This tablegen backend is responsible for emitting arm_neon.h, which includes
10	// a declaration and definition of each function specified by the ARM NEON
11	// compiler interface. See ARM document DUI0348B.
12	//
13	// Each NEON instruction is implemented in terms of 1 or more functions which
14	// are suffixed with the element type of the input vectors. Functions may be
15	// implemented in terms of generic vector operations such as +, , -, etc. or*
16	// by calling a __builtin_-prefixed function which will be handled by clang's
17	// CodeGen library.
18	//
19	// Additional validation code can be generated by this file when runHeader() is
20	// called, rather than the normal run() entry point.
21	//
22	// See also the documentation in include/clang/Basic/arm_neon.td.
23	//
24	//===----------------------------------------------------------------------===//
25
26	#include "TableGenBackends.h"
27	#include "llvm/ADT/ArrayRef.h"
28	#include "llvm/ADT/DenseMap.h"
29	#include "llvm/ADT/STLExtras.h"
30	#include "llvm/ADT/SmallVector.h"
31	#include "llvm/ADT/StringExtras.h"
32	#include "llvm/ADT/StringRef.h"
33	#include "llvm/Support/Casting.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/raw_ostream.h"
36	#include "llvm/TableGen/AArch64ImmCheck.h"
37	#include "llvm/TableGen/Error.h"
38	#include "llvm/TableGen/Record.h"
39	#include "llvm/TableGen/SetTheory.h"
40	#include "llvm/TableGen/StringToOffsetTable.h"
41	#include <algorithm>
42	#include <cassert>
43	#include <cctype>
44	#include <cstddef>
45	#include <cstdint>
46	#include <deque>
47	#include <map>
48	#include <optional>
49	#include <set>
50	#include <sstream>
51	#include <string>
52	#include <unordered_map>
53	#include <utility>
54	#include <vector>
55
56	using namespace llvm;
57
58	namespace {
59
60	// While globals are generally bad, this one allows us to perform assertions
61	// liberally and somehow still trace them back to the def they indirectly
62	// came from.
63	static const Record CurrentRecord = nullptr*;
64	static void assert_with_loc(bool Assertion, const std::string &Str) {
65	if (!Assertion) {
66	if (CurrentRecord)
67	PrintFatalError(ErrorLoc: CurrentRecord->getLoc(), Msg: Str);
68	else
69	PrintFatalError(Msg: Str);
70	}
71	}
72
73	enum ClassKind {
74	ClassNone,
75	ClassI, // generic integer instruction, e.g., "i8" suffix
76	ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
77	ClassW, // width-specific instruction, e.g., "8" suffix
78	ClassV, // void-suffix instruction, no suffix
79	ClassB, // bitcast arguments with enum argument to specify type
80	ClassL, // Logical instructions which are op instructions
81	// but we need to not emit any suffix for in our
82	// tests.
83	ClassNoTest // Instructions which we do not test since they are
84	// not TRUE instructions.
85	};
86
87	/// NeonTypeFlags - Flags to identify the types for overloaded Neon
88	/// builtins. These must be kept in sync with the flags in
89	/// include/clang/Basic/TargetBuiltins.h.
90	namespace NeonTypeFlags {
91
92	enum { EltTypeMask = `0xf`, UnsignedFlag = `0x10`, QuadFlag = `0x20` };
93
94	enum EltType {
95	Int8,
96	Int16,
97	Int32,
98	Int64,
99	Poly8,
100	Poly16,
101	Poly64,
102	Poly128,
103	Float16,
104	Float32,
105	Float64,
106	BFloat16,
107	MFloat8
108	};
109
110	} // end namespace NeonTypeFlags
111
112	class NeonEmitter;
113
114	//===----------------------------------------------------------------------===//
115	// TypeSpec
116	//===----------------------------------------------------------------------===//
117
118	/// A TypeSpec is just a simple wrapper around a string, but gets its own type
119	/// for strong typing purposes.
120	///
121	/// A TypeSpec can be used to create a type.
122	class TypeSpec : public std::string {
123	public:
124	static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
125	std::vector<TypeSpec> Ret;
126	TypeSpec Acc;
127	for (char I : Str.str()) {
128	if (islower(I)) {
129	Acc.push_back(c: I);
130	Ret.push_back(x: TypeSpec (Acc));
131	Acc.clear();
132	} else {
133	Acc.push_back(c: I);
134	}
135	}
136	return Ret;
137	}
138	};
139
140	//===----------------------------------------------------------------------===//
141	// Type
142	//===----------------------------------------------------------------------===//
143
144	/// A Type. Not much more to say here.
145	class Type {
146	private:
147	TypeSpec TS;
148
149	enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8, FPM };
150	TypeKind Kind;
151	bool Immediate, Constant, Pointer;
152	// ScalarForMangling and NoManglingQ are really not suited to live here as
153	// they are not related to the type. But they live in the TypeSpec (not the
154	// prototype), so this is really the only place to store them.
155	bool ScalarForMangling, NoManglingQ;
156	unsigned Bitwidth, ElementBitwidth, NumVectors;
157
158	public:
159	Type()
160	: Kind(Void), Immediate(false), Constant(false),
161	Pointer(false), ScalarForMangling(false), NoManglingQ(false),
162	Bitwidth(`0`), ElementBitwidth(`0`), NumVectors(`0`) {}
163
164	Type(TypeSpec TS, StringRef CharMods)
165	: TS (std::move(TS)), Kind(Void), Immediate(false),
166	Constant(false), Pointer(false), ScalarForMangling(false),
167	NoManglingQ(false), Bitwidth(`0`), ElementBitwidth(`0`), NumVectors(`0`) {
168	applyModifiers(Mods: CharMods);
169	}
170
171	/// Returns a type representing "void".
172	static Type getVoid() { return Type (); }
173
174	bool operator==(const Type &Other) const { return str() == Other.str(); }
175	bool operator!=(const Type &Other) const { return !operator==(Other); }
176
177	//
178	// Query functions
179	//
180	bool isScalarForMangling() const { return ScalarForMangling; }
181	bool noManglingQ() const { return NoManglingQ; }
182
183	bool isPointer() const { return Pointer; }
184	bool isValue() const { return !isVoid() && !isPointer(); }
185	bool isScalar() const { return isValue() && NumVectors == `0`; }
186	bool isVector() const { return isValue() && NumVectors > `0`; }
187	bool isConstPointer() const { return Constant; }
188	bool isFloating() const { return Kind == Float; }
189	bool isInteger() const { return Kind == SInt \|\| Kind == UInt; }
190	bool isPoly() const { return Kind == Poly; }
191	bool isSigned() const { return Kind == SInt; }
192	bool isImmediate() const { return Immediate; }
193	bool isFloat() const { return isFloating() && ElementBitwidth == `32`; }
194	bool isDouble() const { return isFloating() && ElementBitwidth == `64`; }
195	bool isHalf() const { return isFloating() && ElementBitwidth == `16`; }
196	bool isChar() const { return ElementBitwidth == `8`; }
197	bool isShort() const { return isInteger() && ElementBitwidth == `16`; }
198	bool isInt() const { return isInteger() && ElementBitwidth == `32`; }
199	bool isLong() const { return isInteger() && ElementBitwidth == `64`; }
200	bool isVoid() const { return Kind == Void; }
201	bool isBFloat16() const { return Kind == BFloat16; }
202	bool isMFloat8() const { return Kind == MFloat8; }
203	bool isFPM() const { return Kind == FPM; }
204	unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
205	unsigned getSizeInBits() const { return Bitwidth; }
206	unsigned getElementSizeInBits() const { return ElementBitwidth; }
207	unsigned getNumVectors() const { return NumVectors; }
208
209	//
210	// Mutator functions
211	//
212	void makeUnsigned() {
213	assert(!isVoid() && "not a potentially signed type");
214	Kind = UInt;
215	}
216	void makeSigned() {
217	assert(!isVoid() && "not a potentially signed type");
218	Kind = SInt;
219	}
220
221	void makeInteger(unsigned ElemWidth, bool Sign) {
222	assert(!isVoid() && "converting void to int probably not useful");
223	Kind = Sign ? SInt : UInt;
224	Immediate = false;
225	ElementBitwidth = ElemWidth;
226	}
227
228	void makeImmediate(unsigned ElemWidth) {
229	Kind = SInt;
230	Immediate = true;
231	ElementBitwidth = ElemWidth;
232	}
233
234	void makeScalar() {
235	Bitwidth = ElementBitwidth;
236	NumVectors = `0`;
237	}
238
239	void makeOneVector() {
240	assert(isVector());
241	NumVectors = `1`;
242	}
243
244	void make32BitElement() {
245	assert_with_loc(Assertion: Bitwidth > `32`, Str: "Not enough bits to make it 32!");
246	ElementBitwidth = `32`;
247	}
248
249	void doubleLanes() {
250	assert_with_loc(Assertion: Bitwidth != `128`, Str: "Can't get bigger than 128!");
251	Bitwidth = `128`;
252	}
253
254	void halveLanes() {
255	assert_with_loc(Assertion: Bitwidth != `64`, Str: "Can't get smaller than 64!");
256	Bitwidth = `64`;
257	}
258
259	/// Return the C string representation of a type, which is the typename
260	/// defined in stdint.h or arm_neon.h.
261	std::string str() const;
262
263	/// Return the string representation of a type, which is an encoded
264	/// string for passing to the BUILTIN() macro in Builtins.def.
265	std::string builtin_str() const;
266
267	/// Return the value in NeonTypeFlags for this type.
268	unsigned getNeonEnum() const;
269
270	/// Parse a type from a stdint.h or arm_neon.h typedef name,
271	/// for example uint32x2_t or int64_t.
272	static Type fromTypedefName(StringRef Name);
273
274	private:
275	/// Creates the type based on the typespec string in TS.
276	/// Sets "Quad" to true if the "Q" or "H" modifiers were
277	/// seen. This is needed by applyModifier as some modifiers
278	/// only take effect if the type size was changed by "Q" or "H".
279	void applyTypespec(bool &Quad);
280	/// Applies prototype modifiers to the type.
281	void applyModifiers(StringRef Mods);
282	};
283
284	//===----------------------------------------------------------------------===//
285	// Variable
286	//===----------------------------------------------------------------------===//
287
288	/// A variable is a simple class that just has a type and a name.
289	class Variable {
290	Type T;
291	std::string N;
292
293	public:
294	Variable() : T(Type::getVoid()) {}
295	Variable(Type T, std::string N) : T (std::move(T)), N (std::move(N)) {}
296
297	Type getType() const { return T; }
298	std::string getName() const { return "__" + N; }
299	};
300
301	//===----------------------------------------------------------------------===//
302	// Intrinsic
303	//===----------------------------------------------------------------------===//
304
305	/// The main grunt class. This represents an instantiation of an intrinsic with
306	/// a particular typespec and prototype.
307	class Intrinsic {
308	/// The Record this intrinsic was created from.
309	const Record *R;
310	/// The unmangled name.
311	std::string Name;
312	/// The input and output typespecs. InTS == OutTS except when
313	/// CartesianProductWith is non-empty - this is the case for vreinterpret.
314	TypeSpec OutTS, InTS;
315	/// The base class kind. Most intrinsics use ClassS, which has full type
316	/// info for integers (s32/u32). Some use ClassI, which doesn't care about
317	/// signedness (i32), while some (ClassB) have no type at all, only a width
318	/// (32).
319	ClassKind CK;
320	/// The list of DAGs for the body. May be empty, in which case we should
321	/// emit a builtin call.
322	const ListInit *Body;
323	/// The architectural ifdef guard.
324	std::string ArchGuard;
325	/// The architectural target() guard.
326	std::string TargetGuard;
327	/// Set if the Unavailable bit is 1. This means we don't generate a body,
328	/// just an "unavailable" attribute on a declaration.
329	bool IsUnavailable;
330	/// Is this intrinsic safe for big-endian? or does it need its arguments
331	/// reversing?
332	bool BigEndianSafe;
333
334	/// The types of return value [0] and parameters [1..].
335	std::vector<Type> Types;
336
337	SmallVector<ImmCheck, `2`> ImmChecks;
338	/// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
339	int PolymorphicKeyType;
340	/// The local variables defined.
341	std::map<std::string, Variable, std::less<>> Variables;
342	/// NeededEarly - set if any other intrinsic depends on this intrinsic.
343	bool NeededEarly;
344	/// UseMacro - set if we should implement using a macro or unset for a
345	/// function.
346	bool UseMacro;
347	/// The set of intrinsics that this intrinsic uses/requires.
348	std::set<Intrinsic *> Dependencies;
349	/// The "base type", which is Type('d', OutTS). InBaseType is only
350	/// different if CartesianProductWith is non-empty (for vreinterpret).
351	Type BaseType, InBaseType;
352	/// The return variable.
353	Variable RetVar;
354	/// A postfix to apply to every variable. Defaults to "".
355	std::string VariablePostfix;
356
357	NeonEmitter &Emitter;
358	std::stringstream OS;
359
360	bool isBigEndianSafe() const {
361	if (BigEndianSafe)
362	return true;
363
364	for (const auto &T : Types){
365	if (T.isVector() && T.getNumElements() > `1`)
366	return false;
367	}
368	return true;
369	}
370
371	public:
372	Intrinsic(const Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
373	TypeSpec InTS, ClassKind CK, const ListInit *Body,
374	NeonEmitter &Emitter, StringRef ArchGuard, StringRef TargetGuard,
375	bool IsUnavailable, bool BigEndianSafe)
376	: R(R), Name(Name.str()), OutTS (OutTS), InTS (InTS), CK(CK), Body(Body),
377	ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()),
378	IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe),
379	PolymorphicKeyType(`0`), NeededEarly(false), UseMacro(false),
380	BaseType (OutTS, "."), InBaseType (InTS, "."), Emitter(Emitter) {
381	// Modify the TypeSpec per-argument to get a concrete Type, and create
382	// known variables for each.
383	// Types[0] is the return value.
384	unsigned Pos = `0`;
385	Types.emplace_back(args&: OutTS, args: getNextModifiers(Proto, Pos));
386	StringRef Mods = getNextModifiers(Proto, Pos);
387	while (!Mods.empty()) {
388	Types.emplace_back(args&: InTS, args&: Mods);
389	if (Mods.contains(C: `'!'`))
390	PolymorphicKeyType = Types.size() - `1`;
391
392	Mods = getNextModifiers(Proto, Pos);
393	}
394
395	for (const auto &Type : Types) {
396	// If this builtin takes an immediate argument, we need to #define it rather
397	// than use a standard declaration, so that SemaChecking can range check
398	// the immediate passed by the user.
399
400	// Pointer arguments need to use macros to avoid hiding aligned attributes
401	// from the pointer type.
402
403	// It is not permitted to pass or return an __fp16 by value, so intrinsics
404	// taking a scalar float16_t must be implemented as macros.
405	if (Type.isImmediate() \|\| Type.isPointer() \|\|
406	(Type.isScalar() && Type.isHalf()))
407	UseMacro = true;
408	}
409
410	int ArgIdx, Kind, TypeArgIdx;
411	for (const Record *I : R->getValueAsListOfDefs(FieldName: "ImmChecks")) {
412	unsigned EltSizeInBits = `0`, VecSizeInBits = `0`;
413
414	ArgIdx = I->getValueAsInt(FieldName: "ImmArgIdx");
415	TypeArgIdx = I->getValueAsInt(FieldName: "TypeContextArgIdx");
416	Kind = I->getValueAsDef(FieldName: "Kind")->getValueAsInt(FieldName: "Value");
417
418	assert((ArgIdx >= `0` && Kind >= `0`) &&
419	"ImmArgIdx and Kind must be nonnegative");
420
421	if (TypeArgIdx >= `0`) {
422	Type ContextType = getParamType(I: TypeArgIdx);
423
424	// Element size cannot be set for intrinscs that map to polymorphic
425	// builtins.
426	if (CK != ClassB)
427	EltSizeInBits = ContextType.getElementSizeInBits();
428
429	VecSizeInBits = ContextType.getSizeInBits();
430	}
431
432	ImmChecks.emplace_back(Args&: ArgIdx, Args&: Kind, Args&: EltSizeInBits, Args&: VecSizeInBits);
433	}
434	sort(Start: ImmChecks.begin(), End: ImmChecks.end(),
435	Comp: [](const ImmCheck &a, const ImmCheck &b) {
436	return a.getImmArgIdx() < b.getImmArgIdx();
437	}); // Sort for comparison with other intrinsics which map to the
438	// same builtin
439	}
440
441	/// Get the Record that this intrinsic is based off.
442	const Record getRecord() const* { return R; }
443	/// Get the set of Intrinsics that this intrinsic calls.
444	/// this is the set of immediate dependencies, NOT the
445	/// transitive closure.
446	const std::set<Intrinsic > &getDependencies() const* { return Dependencies; }
447	/// Get the architectural guard string (#ifdef).
448	std::string getArchGuard() const { return ArchGuard; }
449	std::string getTargetGuard() const { return TargetGuard; }
450	ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; }
451	/// Get the non-mangled name.
452	std::string getName() const { return Name; }
453
454	/// Return true if the intrinsic takes an immediate operand.
455	bool hasImmediate() const {
456	return any_of(Range: Types, P: [](const Type &T) { return T.isImmediate(); });
457	}
458
459	// Return if the supplied argument is an immediate
460	bool isArgImmediate(unsigned idx) const {
461	return Types [idx + `1`].isImmediate();
462	}
463
464	unsigned getNumParams() const { return Types.size() - `1`; }
465	Type getReturnType() const { return Types [`0`]; }
466	Type getParamType(unsigned I) const { return Types [I + `1`]; }
467	Type getBaseType() const { return BaseType; }
468	Type getPolymorphicKeyType() const { return Types [PolymorphicKeyType]; }
469
470	/// Return true if the prototype has a scalar argument.
471	bool protoHasScalar() const;
472
473	/// Return the index that parameter PIndex will sit at
474	/// in a generated function call. This is often just PIndex,
475	/// but may not be as things such as multiple-vector operands
476	/// and sret parameters need to be taken into account.
477	unsigned getGeneratedParamIdx(unsigned PIndex) {
478	unsigned Idx = `0`;
479	if (getReturnType().getNumVectors() > `1`)
480	// Multiple vectors are passed as sret.
481	++Idx;
482
483	for (unsigned I = `0`; I < PIndex; ++I)
484	Idx += std::max(a: `1U`, b: getParamType(I).getNumVectors());
485
486	return Idx;
487	}
488
489	bool hasBody() const { return Body && !Body->empty(); }
490
491	void setNeededEarly() { NeededEarly = true; }
492
493	bool operator<(const Intrinsic &Other) const {
494	// Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name)
495	return std::tie(args: ArchGuard, args: TargetGuard, args: Name) <
496	std::tie(args: Other.ArchGuard, args: Other.TargetGuard, args: Other.Name);
497	}
498
499	ClassKind getClassKind(bool UseClassBIfScalar = false) {
500	if (UseClassBIfScalar && !protoHasScalar())
501	return ClassB;
502	return CK;
503	}
504
505	/// Return the name, mangled with type information.
506	/// If ForceClassS is true, use ClassS (u32/s32) instead
507	/// of the intrinsic's own type class.
508	std::string getMangledName(bool ForceClassS = false) const;
509	/// Return the type code for a builtin function call.
510	std::string getInstTypeCode(Type T, ClassKind CK) const;
511	/// Return the type string for a BUILTIN() macro in Builtins.def.
512	std::string getBuiltinTypeStr();
513
514	/// Generate the intrinsic, returning code.
515	std::string generate();
516	/// Perform type checking and populate the dependency graph, but
517	/// don't generate code yet.
518	void indexBody();
519
520	private:
521	StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
522
523	std::string mangleName(std::string Name, ClassKind CK) const;
524
525	void initVariables();
526	std::string replaceParamsIn(std::string S);
527
528	void emitBodyAsBuiltinCall();
529
530	void generateImpl(bool ReverseArguments,
531	StringRef NamePrefix, StringRef CallPrefix);
532	void emitReturn();
533	void emitBody(StringRef CallPrefix);
534	void emitShadowedArgs();
535	void emitArgumentReversal();
536	void emitReturnVarDecl();
537	void emitReturnReversal();
538	void emitReverseVariable(Variable &Dest, Variable &Src);
539	void emitNewLine();
540	void emitClosingBrace();
541	void emitOpeningBrace();
542	void emitPrototype(StringRef NamePrefix);
543
544	class DagEmitter {
545	Intrinsic &Intr;
546	StringRef CallPrefix;
547
548	public:
549	DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
550	Intr(Intr), CallPrefix (CallPrefix) {
551	}
552	std::pair<Type, std::string> emitDagArg(const Init *Arg,
553	std::string ArgName);
554	std::pair<Type, std::string> emitDagSaveTemp(const DagInit *DI);
555	std::pair<Type, std::string> emitDagSplat(const DagInit *DI);
556	std::pair<Type, std::string> emitDagDup(const DagInit *DI);
557	std::pair<Type, std::string> emitDagDupTyped(const DagInit *DI);
558	std::pair<Type, std::string> emitDagShuffle(const DagInit *DI);
559	std::pair<Type, std::string> emitDagCast(const DagInit DI, bool* IsBitCast);
560	std::pair<Type, std::string> emitDagCall(const DagInit *DI,
561	bool MatchMangledName);
562	std::pair<Type, std::string> emitDagNameReplace(const DagInit *DI);
563	std::pair<Type, std::string> emitDagLiteral(const DagInit *DI);
564	std::pair<Type, std::string> emitDagOp(const DagInit *DI);
565	std::pair<Type, std::string> emitDag(const DagInit *DI);
566	};
567	};
568
569	//===----------------------------------------------------------------------===//
570	// NeonEmitter
571	//===----------------------------------------------------------------------===//
572
573	class NeonEmitter {
574	const RecordKeeper &Records;
575	DenseMap<const Record *, ClassKind> ClassMap;
576	std::map<std::string, std::deque<Intrinsic>, std::less<>> IntrinsicMap;
577	unsigned UniqueNumber;
578
579	void createIntrinsic(const Record R, SmallVectorImpl<Intrinsic > &Out);
580	void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
581	void genStreamingSVECompatibleList(raw_ostream &OS,
582	SmallVectorImpl<Intrinsic *> &Defs);
583	void genOverloadTypeCheckCode(raw_ostream &OS,
584	SmallVectorImpl<Intrinsic *> &Defs);
585	bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
586	const ArrayRef<ImmCheck> ChecksB);
587	void genIntrinsicRangeCheckCode(raw_ostream &OS,
588	SmallVectorImpl<Intrinsic *> &Defs);
589
590	public:
591	/// Called by Intrinsic - this attempts to get an intrinsic that takes
592	/// the given types as arguments.
593	Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
594	std::optional<std::string> MangledName);
595
596	/// Called by Intrinsic - returns a globally-unique number.
597	unsigned getUniqueNumber() { return UniqueNumber++; }
598
599	NeonEmitter(const RecordKeeper &R) : Records(R), UniqueNumber(`0`) {
600	const Record *SI = R.getClass(Name: "SInst");
601	const Record *II = R.getClass(Name: "IInst");
602	const Record *WI = R.getClass(Name: "WInst");
603	const Record *VI = R.getClass(Name: "VInst");
604	const Record *SOpI = R.getClass(Name: "SOpInst");
605	const Record *IOpI = R.getClass(Name: "IOpInst");
606	const Record *WOpI = R.getClass(Name: "WOpInst");
607	const Record *LOpI = R.getClass(Name: "LOpInst");
608	const Record *NoTestOpI = R.getClass(Name: "NoTestOpInst");
609
610	ClassMap [SI] = ClassS;
611	ClassMap [II] = ClassI;
612	ClassMap [WI] = ClassW;
613	ClassMap [VI] = ClassV;
614	ClassMap [SOpI] = ClassS;
615	ClassMap [IOpI] = ClassI;
616	ClassMap [WOpI] = ClassW;
617	ClassMap [LOpI] = ClassL;
618	ClassMap [NoTestOpI] = ClassNoTest;
619	}
620
621	// Emit arm_neon.h.inc
622	void run(raw_ostream &o);
623
624	// Emit arm_fp16.h.inc
625	void runFP16(raw_ostream &o);
626
627	// Emit arm_bf16.h.inc
628	void runBF16(raw_ostream &o);
629
630	void runVectorTypes(raw_ostream &o);
631
632	// Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
633	// arm_bf16.h
634	void runHeader(raw_ostream &o);
635	};
636
637	} // end anonymous namespace
638
639	//===----------------------------------------------------------------------===//
640	// Type implementation
641	//===----------------------------------------------------------------------===//
642
643	std::string Type::str() const {
644	if (isVoid())
645	return "void";
646	if (isFPM())
647	return "fpm_t";
648
649	std::string S;
650
651	if (isInteger() && !isSigned())
652	S += "u";
653
654	if (isPoly())
655	S += "poly";
656	else if (isFloating())
657	S += "float";
658	else if (isBFloat16())
659	S += "bfloat";
660	else if (isMFloat8())
661	S += "mfloat";
662	else
663	S += "int";
664
665	S += utostr(X: ElementBitwidth);
666	if (isVector())
667	S += "x" + utostr(X: getNumElements());
668	if (NumVectors > `1`)
669	S += "x" + utostr(X: NumVectors);
670	S += "_t";
671
672	if (Constant)
673	S += " const";
674	if (Pointer)
675	S += " *";
676
677	return S;
678	}
679
680	std::string Type::builtin_str() const {
681	std::string S;
682	if (isVoid())
683	return "v";
684
685	if (isPointer()) {
686	// All pointers are void pointers.
687	S = "v";
688	if (isConstPointer())
689	S += "C";
690	S += "*";
691	return S;
692	} else if (isInteger())
693	switch (ElementBitwidth) {
694	case `8`: S += "c"; break;
695	case `16`: S += "s"; break;
696	case `32`: S += "i"; break;
697	case `64`: S += "Wi"; break;
698	case `128`: S += "LLLi"; break;
699	default: llvm_unreachable("Unhandled case!");
700	}
701	else if (isBFloat16()) {
702	assert(ElementBitwidth == `16` && "BFloat16 can only be 16 bits");
703	S += "y";
704	} else if (isMFloat8()) {
705	assert(ElementBitwidth == `8` && "MFloat8 can only be 8 bits");
706	S += "m";
707	} else if (isFPM()) {
708	S += "UWi";
709	} else
710	switch (ElementBitwidth) {
711	case `16`: S += "h"; break;
712	case `32`: S += "f"; break;
713	case `64`: S += "d"; break;
714	default: llvm_unreachable("Unhandled case!");
715	}
716
717	// FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
718	if (isChar() && !isPointer() && isSigned())
719	// Make chars explicitly signed.
720	S = "S" + S;
721	else if (isInteger() && !isSigned())
722	S = "U" + S;
723
724	// Constant indices are "int", but have the "constant expression" modifier.
725	if (isImmediate()) {
726	assert(isInteger() && isSigned());
727	S = "I" + S;
728	}
729
730	if (isScalar())
731	return S;
732
733	std::string Ret;
734	for (unsigned I = `0`; I < NumVectors; ++I)
735	Ret += "V" + utostr(X: getNumElements()) + S;
736
737	return Ret;
738	}
739
740	unsigned Type::getNeonEnum() const {
741	unsigned Addend;
742	switch (ElementBitwidth) {
743	case `8`: Addend = `0`; break;
744	case `16`: Addend = `1`; break;
745	case `32`: Addend = `2`; break;
746	case `64`: Addend = `3`; break;
747	case `128`: Addend = `4`; break;
748	default: llvm_unreachable("Unhandled element bitwidth!");
749	}
750
751	unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
752	if (isPoly()) {
753	// Adjustment needed because Poly32 doesn't exist.
754	if (Addend >= `2`)
755	--Addend;
756	Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
757	}
758	if (isFloating()) {
759	assert(Addend != `0` && "Float8 doesn't exist!");
760	Base = (unsigned)NeonTypeFlags::Float16 + (Addend - `1`);
761	}
762
763	if (isBFloat16()) {
764	assert(Addend == `1` && "BFloat16 is only 16 bit");
765	Base = (unsigned)NeonTypeFlags::BFloat16;
766	}
767
768	if (isMFloat8()) {
769	Base = (unsigned)NeonTypeFlags::MFloat8;
770	}
771
772	if (Bitwidth == `128`)
773	Base \|= (unsigned)NeonTypeFlags::QuadFlag;
774	if (isInteger() && !isSigned())
775	Base \|= (unsigned)NeonTypeFlags::UnsignedFlag;
776
777	return Base;
778	}
779
780	Type Type::fromTypedefName(StringRef Name) {
781	Type T;
782	T.Kind = SInt;
783
784	if (Name.consume_front(Prefix: "u"))
785	T.Kind = UInt;
786
787	if (Name.consume_front(Prefix: "float")) {
788	T.Kind = Float;
789	} else if (Name.consume_front(Prefix: "poly")) {
790	T.Kind = Poly;
791	} else if (Name.consume_front(Prefix: "bfloat")) {
792	T.Kind = BFloat16;
793	} else if (Name.consume_front(Prefix: "mfloat")) {
794	T.Kind = MFloat8;
795	} else {
796	assert(Name.starts_with("int"));
797	Name = Name.drop_front(N: `3`);
798	}
799
800	unsigned I = `0`;
801	for (I = `0`; I < Name.size(); ++I) {
802	if (!isdigit(Name [I]))
803	break;
804	}
805	Name.substr(Start: `0`, N: I).getAsInteger(Radix: `10`, Result&: T.ElementBitwidth);
806	Name = Name.drop_front(N: I);
807
808	T.Bitwidth = T.ElementBitwidth;
809	T.NumVectors = `1`;
810
811	if (Name.consume_front(Prefix: "x")) {
812	unsigned I = `0`;
813	for (I = `0`; I < Name.size(); ++I) {
814	if (!isdigit(Name [I]))
815	break;
816	}
817	unsigned NumLanes;
818	Name.substr(Start: `0`, N: I).getAsInteger(Radix: `10`, Result&: NumLanes);
819	Name = Name.drop_front(N: I);
820	T.Bitwidth = T.ElementBitwidth * NumLanes;
821	} else {
822	// Was scalar.
823	T.NumVectors = `0`;
824	}
825	if (Name.consume_front(Prefix: "x")) {
826	unsigned I = `0`;
827	for (I = `0`; I < Name.size(); ++I) {
828	if (!isdigit(Name [I]))
829	break;
830	}
831	Name.substr(Start: `0`, N: I).getAsInteger(Radix: `10`, Result&: T.NumVectors);
832	Name = Name.drop_front(N: I);
833	}
834
835	assert(Name.starts_with("_t") && "Malformed typedef!");
836	return T;
837	}
838
839	void Type::applyTypespec(bool &Quad) {
840	std::string S = TS;
841	ScalarForMangling = false;
842	Kind = SInt;
843	ElementBitwidth = ~`0U`;
844	NumVectors = `1`;
845
846	for (char I : S) {
847	switch (I) {
848	case `'S'`:
849	ScalarForMangling = true;
850	break;
851	case `'H'`:
852	NoManglingQ = true;
853	Quad = true;
854	break;
855	case `'Q'`:
856	Quad = true;
857	break;
858	case `'P'`:
859	Kind = Poly;
860	break;
861	case `'U'`:
862	Kind = UInt;
863	break;
864	case `'c'`:
865	ElementBitwidth = `8`;
866	break;
867	case `'h'`:
868	Kind = Float;
869	[[fallthrough]];
870	case `'s'`:
871	ElementBitwidth = `16`;
872	break;
873	case `'f'`:
874	Kind = Float;
875	[[fallthrough]];
876	case `'i'`:
877	ElementBitwidth = `32`;
878	break;
879	case `'d'`:
880	Kind = Float;
881	[[fallthrough]];
882	case `'l'`:
883	ElementBitwidth = `64`;
884	break;
885	case `'k'`:
886	ElementBitwidth = `128`;
887	// Poly doesn't have a 128x1 type.
888	if (isPoly())
889	NumVectors = `0`;
890	break;
891	case `'b'`:
892	Kind = BFloat16;
893	ElementBitwidth = `16`;
894	break;
895	case `'m'`:
896	Kind = MFloat8;
897	ElementBitwidth = `8`;
898	break;
899	default:
900	llvm_unreachable("Unhandled type code!");
901	}
902	}
903	assert(ElementBitwidth != ~`0U` && "Bad element bitwidth!");
904
905	Bitwidth = Quad ? `128` : `64`;
906	}
907
908	void Type::applyModifiers(StringRef Mods) {
909	bool AppliedQuad = false;
910	applyTypespec(Quad&: AppliedQuad);
911
912	for (char Mod : Mods) {
913	switch (Mod) {
914	case `'.'`:
915	break;
916	case `'v'`:
917	Kind = Void;
918	break;
919	case `'S'`:
920	Kind = SInt;
921	break;
922	case `'U'`:
923	Kind = UInt;
924	break;
925	case `'B'`:
926	Kind = BFloat16;
927	ElementBitwidth = `16`;
928	break;
929	case `'F'`:
930	Kind = Float;
931	break;
932	case `'P'`:
933	Kind = Poly;
934	break;
935	case `'V'`:
936	Kind = FPM;
937	Bitwidth = ElementBitwidth = `64`;
938	NumVectors = `0`;
939	Immediate = Constant = Pointer = false;
940	ScalarForMangling = NoManglingQ = true;
941	break;
942	case `'>'`:
943	assert(ElementBitwidth < `128`);
944	ElementBitwidth *= `2`;
945	break;
946	case `'<'`:
947	assert(ElementBitwidth > `8`);
948	ElementBitwidth /= `2`;
949	break;
950	case `'1'`:
951	NumVectors = `0`;
952	break;
953	case `'2'`:
954	NumVectors = `2`;
955	break;
956	case `'3'`:
957	NumVectors = `3`;
958	break;
959	case `'4'`:
960	NumVectors = `4`;
961	break;
962	case `'*'`:
963	Pointer = true;
964	break;
965	case `'c'`:
966	Constant = true;
967	break;
968	case `'Q'`:
969	Bitwidth = `128`;
970	break;
971	case `'q'`:
972	Bitwidth = `64`;
973	break;
974	case `'I'`:
975	Kind = SInt;
976	ElementBitwidth = Bitwidth = `32`;
977	NumVectors = `0`;
978	Immediate = true;
979	break;
980	case `'p'`:
981	if (isPoly())
982	Kind = UInt;
983	break;
984	case `'!'`:
985	// Key type, handled elsewhere.
986	break;
987	default:
988	llvm_unreachable("Unhandled character!");
989	}
990	}
991	}
992
993	//===----------------------------------------------------------------------===//
994	// Intrinsic implementation
995	//===----------------------------------------------------------------------===//
996
997	StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
998	if (Proto.size() == Pos)
999	return StringRef ();
1000	else if (Proto [Pos] != `'('`)
1001	return Proto.substr(Start: Pos++, N: `1`);
1002
1003	size_t Start = Pos + `1`;
1004	size_t End = Proto.find(C: `')'`, From: Start);
1005	assert_with_loc(Assertion: End != StringRef::npos, Str: "unmatched modifier group paren");
1006	Pos = End + `1`;
1007	return Proto.slice(Start, End);
1008	}
1009
1010	std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
1011	char typeCode = `'\0'`;
1012	bool printNumber = true;
1013
1014	if (CK == ClassB && TargetGuard == "neon")
1015	return "";
1016
1017	if (this->CK == ClassV)
1018	return "";
1019
1020	if (T.isBFloat16())
1021	return "bf16";
1022
1023	if (T.isMFloat8())
1024	return "mf8";
1025
1026	if (T.isPoly())
1027	typeCode = `'p'`;
1028	else if (T.isInteger())
1029	typeCode = T.isSigned() ? `'s'` : `'u'`;
1030	else
1031	typeCode = `'f'`;
1032
1033	if (CK == ClassI) {
1034	switch (typeCode) {
1035	default:
1036	break;
1037	case `'s'`:
1038	case `'u'`:
1039	case `'p'`:
1040	typeCode = `'i'`;
1041	break;
1042	}
1043	}
1044	if (CK == ClassB && TargetGuard == "neon") {
1045	typeCode = `'\0'`;
1046	}
1047
1048	std::string S;
1049	if (typeCode != `'\0'`)
1050	S.push_back(c: typeCode);
1051	if (printNumber)
1052	S += utostr(X: T.getElementSizeInBits());
1053
1054	return S;
1055	}
1056
1057	std::string Intrinsic::getBuiltinTypeStr() {
1058	ClassKind LocalCK = getClassKind(UseClassBIfScalar: true);
1059	std::string S;
1060
1061	Type RetT = getReturnType();
1062	if ((LocalCK == ClassI \|\| LocalCK == ClassW) && RetT.isScalar() &&
1063	!RetT.isFloating() && !RetT.isBFloat16() && !RetT.isMFloat8())
1064	RetT.makeInteger(ElemWidth: RetT.getElementSizeInBits(), Sign: false);
1065
1066	// Since the return value must be one type, return a vector type of the
1067	// appropriate width which we will bitcast. An exception is made for
1068	// returning structs of 2, 3, or 4 vectors which are returned in a sret-like
1069	// fashion, storing them to a pointer arg.
1070	if (RetT.getNumVectors() > `1`) {
1071	S += "vv"; // void result with void* first argument*
1072	} else {
1073	if (RetT.isPoly())
1074	RetT.makeInteger(ElemWidth: RetT.getElementSizeInBits(), Sign: false);
1075	if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
1076	RetT.makeSigned();
1077
1078	if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
1079	// Cast to vector of 8-bit elements.
1080	RetT.makeInteger(ElemWidth: `8`, Sign: true);
1081
1082	S += RetT.builtin_str();
1083	}
1084
1085	for (unsigned I = `0`; I < getNumParams(); ++I) {
1086	Type T = getParamType(I);
1087	if (T.isPoly())
1088	T.makeInteger(ElemWidth: T.getElementSizeInBits(), Sign: false);
1089
1090	if (LocalCK == ClassB && !T.isScalar())
1091	T.makeInteger(ElemWidth: `8`, Sign: true);
1092	// Halves always get converted to 8-bit elements.
1093	if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
1094	T.makeInteger(ElemWidth: `8`, Sign: true);
1095
1096	if (LocalCK == ClassI && T.isInteger())
1097	T.makeSigned();
1098
1099	if (isArgImmediate(idx: I))
1100	T.makeImmediate(ElemWidth: `32`);
1101
1102	S += T.builtin_str();
1103	}
1104
1105	// Extra constant integer to hold type class enum for this function, e.g. s8
1106	if (LocalCK == ClassB)
1107	S += "i";
1108
1109	return S;
1110	}
1111
1112	std::string Intrinsic::getMangledName(bool ForceClassS) const {
1113	// Check if the prototype has a scalar operand with the type of the vector
1114	// elements. If not, bitcasting the args will take care of arg checking.
1115	// The actual signedness etc. will be taken care of with special enums.
1116	ClassKind LocalCK = CK;
1117	if (!protoHasScalar())
1118	LocalCK = ClassB;
1119
1120	return mangleName(Name, CK: ForceClassS ? ClassS : LocalCK);
1121	}
1122
1123	std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
1124	std::string typeCode = getInstTypeCode(T: BaseType, CK: LocalCK);
1125	std::string S = Name;
1126
1127	if (Name == "vcvt_f16_f32" \|\| Name == "vcvt_f32_f16" \|\|
1128	Name == "vcvt_f32_f64" \|\| Name == "vcvt_f64_f32" \|\|
1129	Name == "vcvt_f32_bf16")
1130	return Name;
1131
1132	if (!typeCode.empty()) {
1133	// If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
1134	if (Name.size() >= `3` && isdigit(Name.back()) &&
1135	Name [Name.length() - `2`] == `'x'` && Name [Name.length() - `3`] == `'_'`)
1136	S.insert(pos1: S.length() - `3`, str: "_" + typeCode);
1137	else
1138	S += "_" + typeCode;
1139	}
1140
1141	if (BaseType != InBaseType) {
1142	// A reinterpret - out the input base type at the end.
1143	S += "_" + getInstTypeCode(T: InBaseType, CK: LocalCK);
1144	}
1145
1146	if (LocalCK == ClassB && TargetGuard == "neon")
1147	S += "_v";
1148
1149	// Insert a 'q' before the first '_' character so that it ends up before
1150	// _lane or _n on vector-scalar operations.
1151	if (BaseType.getSizeInBits() == `128` && !BaseType.noManglingQ()) {
1152	size_t Pos = S.find(c: `'_'`);
1153	S.insert(pos: Pos, s: "q");
1154	}
1155
1156	char Suffix = `'\0'`;
1157	if (BaseType.isScalarForMangling()) {
1158	switch (BaseType.getElementSizeInBits()) {
1159	case `8`: Suffix = `'b'`; break;
1160	case `16`: Suffix = `'h'`; break;
1161	case `32`: Suffix = `'s'`; break;
1162	case `64`: Suffix = `'d'`; break;
1163	default: llvm_unreachable("Bad suffix!");
1164	}
1165	}
1166	if (Suffix != `'\0'`) {
1167	size_t Pos = S.find(c: `'_'`);
1168	S.insert(pos: Pos, s: &Suffix, n: `1`);
1169	}
1170
1171	return S;
1172	}
1173
1174	std::string Intrinsic::replaceParamsIn(std::string S) {
1175	while (S.find(c: `'$'`) != std::string::npos) {
1176	size_t Pos = S.find(c: `'$'`);
1177	size_t End = Pos + `1`;
1178	while (isalpha(S [End]))
1179	++End;
1180
1181	std::string VarName = S.substr(pos: Pos + `1`, n: End - Pos - `1`);
1182	assert_with_loc(Assertion: Variables.find(x: VarName) != Variables.end(),
1183	Str: "Variable not defined!");
1184	S.replace(pos: Pos, n: End - Pos, str: Variables.find(x: VarName)->second.getName());
1185	}
1186
1187	return S;
1188	}
1189
1190	void Intrinsic::initVariables() {
1191	Variables.clear();
1192
1193	// Modify the TypeSpec per-argument to get a concrete Type, and create
1194	// known variables for each.
1195	for (unsigned I = `1`; I < Types.size(); ++I) {
1196	char NameC = `'0'` + (I - `1`);
1197	std::string Name = "p";
1198	Name.push_back(c: NameC);
1199
1200	Variables [Name] = Variable (Types [I], Name + VariablePostfix);
1201	}
1202	RetVar = Variable (Types [`0`], "ret" + VariablePostfix);
1203	}
1204
1205	void Intrinsic::emitPrototype(StringRef NamePrefix) {
1206	if (UseMacro) {
1207	OS << "#define ";
1208	} else {
1209	OS << "__ai ";
1210	if (TargetGuard != "")
1211	OS << "__attribute__((target(\"" << TargetGuard << "\"))) ";
1212	OS << Types [`0`].str() << " ";
1213	}
1214
1215	OS << NamePrefix.str() << mangleName(Name, LocalCK: ClassS) << "(";
1216
1217	for (unsigned I = `0`; I < getNumParams(); ++I) {
1218	if (I != `0`)
1219	OS << ", ";
1220
1221	char NameC = `'0'` + I;
1222	std::string Name = "p";
1223	Name.push_back(c: NameC);
1224	assert(Variables.find(Name) != Variables.end());
1225	Variable &V = Variables [Name];
1226
1227	if (!UseMacro)
1228	OS << V.getType().str() << " ";
1229	OS << V.getName();
1230	}
1231
1232	OS << ")";
1233	}
1234
1235	void Intrinsic::emitOpeningBrace() {
1236	if (UseMacro)
1237	OS << " __extension__ ({";
1238	else
1239	OS << " {";
1240	emitNewLine();
1241	}
1242
1243	void Intrinsic::emitClosingBrace() {
1244	if (UseMacro)
1245	OS << "})";
1246	else
1247	OS << "}";
1248	}
1249
1250	void Intrinsic::emitNewLine() {
1251	if (UseMacro)
1252	OS << " \\\n";
1253	else
1254	OS << "\n";
1255	}
1256
1257	void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
1258	if (Dest.getType().getNumVectors() > `1`) {
1259	emitNewLine();
1260
1261	for (unsigned K = `0`; K < Dest.getType().getNumVectors(); ++K) {
1262	OS << " " << Dest.getName() << ".val[" << K << "] = "
1263	<< "__builtin_shufflevector(" << Src.getName() << ".val[" << K << "], "
1264	<< Src.getName() << ".val[" << K << "], __lane_reverse_"
1265	<< Dest.getType().getSizeInBits() << "_"
1266	<< Dest.getType().getElementSizeInBits() << ");";
1267	emitNewLine();
1268	}
1269	} else {
1270	OS << " " << Dest.getName() << " = __builtin_shufflevector("
1271	<< Src.getName() << ", " << Src.getName() << ", __lane_reverse_"
1272	<< Dest.getType().getSizeInBits() << "_"
1273	<< Dest.getType().getElementSizeInBits() << ");";
1274	emitNewLine();
1275	}
1276	}
1277
1278	void Intrinsic::emitArgumentReversal() {
1279	if (isBigEndianSafe())
1280	return;
1281
1282	// Reverse all vector arguments.
1283	for (unsigned I = `0`; I < getNumParams(); ++I) {
1284	std::string Name = "p" + utostr(X: I);
1285	std::string NewName = "rev" + utostr(X: I);
1286
1287	Variable &V = Variables [Name];
1288	Variable NewV(V.getType(), NewName + VariablePostfix);
1289
1290	if (!NewV.getType().isVector() \|\| NewV.getType().getNumElements() == `1`)
1291	continue;
1292
1293	OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";
1294	emitReverseVariable(Dest&: NewV, Src&: V);
1295	V = NewV;
1296	}
1297	}
1298
1299	void Intrinsic::emitReturnVarDecl() {
1300	assert(RetVar.getType() == Types[`0`]);
1301	// Create a return variable, if we're not void.
1302	if (!RetVar.getType().isVoid()) {
1303	OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
1304	emitNewLine();
1305	}
1306	}
1307
1308	void Intrinsic::emitReturnReversal() {
1309	if (isBigEndianSafe())
1310	return;
1311	if (!getReturnType().isVector() \|\| getReturnType().isVoid() \|\|
1312	getReturnType().getNumElements() == `1`)
1313	return;
1314	emitReverseVariable(Dest&: RetVar, Src&: RetVar);
1315	}
1316
1317	void Intrinsic::emitShadowedArgs() {
1318	// Macro arguments are not type-checked like inline function arguments,
1319	// so assign them to local temporaries to get the right type checking.
1320	if (!UseMacro)
1321	return;
1322
1323	for (unsigned I = `0`; I < getNumParams(); ++I) {
1324	// Do not create a temporary for an immediate argument.
1325	// That would defeat the whole point of using a macro!
1326	if (getParamType(I).isImmediate())
1327	continue;
1328	// Do not create a temporary for pointer arguments. The input
1329	// pointer may have an alignment hint.
1330	if (getParamType(I).isPointer())
1331	continue;
1332
1333	std::string Name = "p" + utostr(X: I);
1334
1335	assert(Variables.find(Name) != Variables.end());
1336	Variable &V = Variables [Name];
1337
1338	std::string NewName = "s" + utostr(X: I);
1339	Variable V2(V.getType(), NewName + VariablePostfix);
1340
1341	OS << " " << V2.getType().str() << " " << V2.getName() << " = "
1342	<< V.getName() << ";";
1343	emitNewLine();
1344
1345	V = V2;
1346	}
1347	}
1348
1349	bool Intrinsic::protoHasScalar() const {
1350	return any_of(Range: Types,
1351	P: [](const Type &T) { return T.isScalar() && !T.isImmediate(); });
1352	}
1353
1354	void Intrinsic::emitBodyAsBuiltinCall() {
1355	std::string S;
1356
1357	// If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1358	// sret-like argument.
1359	bool SRet = getReturnType().getNumVectors() >= `2`;
1360
1361	StringRef N = Name;
1362	ClassKind LocalCK = CK;
1363	if (!protoHasScalar())
1364	LocalCK = ClassB;
1365
1366	if (!getReturnType().isVoid() && !SRet)
1367	S += "__builtin_bit_cast(" + RetVar.getType().str() + ", ";
1368
1369	S += "__builtin_neon_" + mangleName(Name: std::string (N), LocalCK) + "(";
1370
1371	if (SRet)
1372	S += "&" + RetVar.getName() + ", ";
1373
1374	for (unsigned I = `0`; I < getNumParams(); ++I) {
1375	Variable &V = Variables ["p" + utostr(X: I)];
1376	Type T = V.getType();
1377
1378	// Handle multiple-vector values specially, emitting each subvector as an
1379	// argument to the builtin.
1380	if (T.getNumVectors() > `1`) {
1381	// Check if an explicit cast is needed.
1382	std::string Cast;
1383	if (LocalCK == ClassB) {
1384	Type T2 = T;
1385	T2.makeOneVector();
1386	T2.makeInteger(ElemWidth: `8`, /Sign=/true);
1387	Cast = "__builtin_bit_cast(" + T2.str() + ", ";
1388	}
1389
1390	for (unsigned J = `0`; J < T.getNumVectors(); ++J)
1391	S += Cast + V.getName() + ".val[" + utostr(X: J) + "]" +
1392	(Cast.empty() ? ", " : "), ");
1393	continue;
1394	}
1395
1396	std::string Arg = V.getName();
1397	Type CastToType = T;
1398
1399	// Check if an explicit cast is needed.
1400	if (CastToType.isVector()) {
1401	if (LocalCK == ClassB \|\| (T.isHalf() && !T.isScalarForMangling())) {
1402	CastToType.makeInteger(ElemWidth: `8`, Sign: true);
1403	Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1404	} else if (LocalCK == ClassI) {
1405	if (CastToType.isInteger()) {
1406	CastToType.makeSigned();
1407	Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1408	}
1409	}
1410	}
1411
1412	S += Arg + ", ";
1413	}
1414
1415	// Extra constant integer to hold type class enum for this function, e.g. s8
1416	if (getClassKind(UseClassBIfScalar: true) == ClassB) {
1417	S += utostr(X: getPolymorphicKeyType().getNeonEnum());
1418	} else {
1419	// Remove extraneous ", ".
1420	S.pop_back();
1421	S.pop_back();
1422	}
1423
1424	if (!getReturnType().isVoid() && !SRet)
1425	S += ")";
1426	S += ");";
1427
1428	std::string RetExpr;
1429	if (!SRet && !RetVar.getType().isVoid())
1430	RetExpr = RetVar.getName() + " = ";
1431
1432	OS << " " << RetExpr << S;
1433	emitNewLine();
1434	}
1435
1436	void Intrinsic::emitBody(StringRef CallPrefix) {
1437	std::vector<std::string> Lines;
1438
1439	if (!Body \|\| Body->empty()) {
1440	// Nothing specific to output - must output a builtin.
1441	emitBodyAsBuiltinCall();
1442	return;
1443	}
1444
1445	// We have a list of "things to output". The last should be returned.
1446	for (auto *I : Body->getElements()) {
1447	if (const auto *SI = dyn_cast<StringInit>(Val: I)) {
1448	Lines.push_back(x: replaceParamsIn(S: SI->getAsString()));
1449	} else if (const auto *DI = dyn_cast<DagInit>(Val: I)) {
1450	DagEmitter DE(*this, CallPrefix);
1451	Lines.push_back(x: DE.emitDag(DI).second + ";");
1452	}
1453	}
1454
1455	assert(!Lines.empty() && "Empty def?");
1456	if (!RetVar.getType().isVoid())
1457	Lines.back().insert(pos1: `0`, str: RetVar.getName() + " = ");
1458
1459	for (auto &L : Lines) {
1460	OS << " " << L;
1461	emitNewLine();
1462	}
1463	}
1464
1465	void Intrinsic::emitReturn() {
1466	if (RetVar.getType().isVoid())
1467	return;
1468	if (UseMacro)
1469	OS << " " << RetVar.getName() << ";";
1470	else
1471	OS << " return " << RetVar.getName() << ";";
1472	emitNewLine();
1473	}
1474
1475	std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(const DagInit *DI) {
1476	// At this point we should only be seeing a def.
1477	const DefInit *DefI = cast<DefInit>(Val: DI->getOperator());
1478	std::string Op = DefI->getAsString();
1479
1480	if (Op == "cast" \|\| Op == "bitcast")
1481	return emitDagCast(DI, IsBitCast: Op == "bitcast");
1482	if (Op == "shuffle")
1483	return emitDagShuffle(DI);
1484	if (Op == "dup")
1485	return emitDagDup(DI);
1486	if (Op == "dup_typed")
1487	return emitDagDupTyped(DI);
1488	if (Op == "splat")
1489	return emitDagSplat(DI);
1490	if (Op == "save_temp")
1491	return emitDagSaveTemp(DI);
1492	if (Op == "op")
1493	return emitDagOp(DI);
1494	if (Op == "call" \|\| Op == "call_mangled")
1495	return emitDagCall(DI, MatchMangledName: Op == "call_mangled");
1496	if (Op == "name_replace")
1497	return emitDagNameReplace(DI);
1498	if (Op == "literal")
1499	return emitDagLiteral(DI);
1500	assert_with_loc(Assertion: false, Str: "Unknown operation!");
1501	return std::make_pair(x: Type::getVoid(), y: "");
1502	}
1503
1504	std::pair<Type, std::string>
1505	Intrinsic::DagEmitter::emitDagOp(const DagInit *DI) {
1506	std::string Op = cast<StringInit>(Val: DI->getArg(Num: `0`))->getAsUnquotedString();
1507	if (DI->getNumArgs() == `2`) {
1508	// Unary op.
1509	std::pair<Type, std::string> R =
1510	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1511	return std::make_pair(x&: R.first, y: Op + R.second);
1512	} else {
1513	assert(DI->getNumArgs() == `3` && "Can only handle unary and binary ops!");
1514	std::pair<Type, std::string> R1 =
1515	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1516	std::pair<Type, std::string> R2 =
1517	emitDagArg(Arg: DI->getArg(Num: `2`), ArgName: std::string (DI->getArgNameStr(Num: `2`)));
1518	assert_with_loc(Assertion: R1.first == R2.first, Str: "Argument type mismatch!");
1519	return std::make_pair(x&: R1.first, y: R1.second + " " + Op + " " + R2.second);
1520	}
1521	}
1522
1523	std::pair<Type, std::string>
1524	Intrinsic::DagEmitter::emitDagCall(const DagInit DI, bool* MatchMangledName) {
1525	std::vector<Type> Types;
1526	std::vector<std::string> Values;
1527	for (unsigned I = `0`; I < DI->getNumArgs() - `1`; ++I) {
1528	std::pair<Type, std::string> R =
1529	emitDagArg(Arg: DI->getArg(Num: I + `1`), ArgName: std::string (DI->getArgNameStr(Num: I + `1`)));
1530	Types.push_back(x: R.first);
1531	Values.push_back(x: R.second);
1532	}
1533
1534	// Look up the called intrinsic.
1535	std::string N;
1536	if (const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: `0`)))
1537	N = SI->getAsUnquotedString();
1538	else
1539	N = emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: "").second;
1540	std::optional<std::string> MangledName;
1541	if (MatchMangledName) {
1542	if (Intr.getRecord()->getValueAsString(FieldName: "Name").contains(Other: "laneq"))
1543	N += "q";
1544	MangledName = Intr.mangleName(Name: N, LocalCK: ClassS);
1545	}
1546	Intrinsic &Callee = Intr.Emitter.getIntrinsic(Name: N, Types, MangledName);
1547
1548	// Make sure the callee is known as an early def.
1549	Callee.setNeededEarly();
1550	Intr.Dependencies.insert(x: &Callee);
1551
1552	// Now create the call itself.
1553	std::string S;
1554	if (!Callee.isBigEndianSafe())
1555	S += CallPrefix.str();
1556	S += Callee.getMangledName(ForceClassS: true) + "(";
1557	for (unsigned I = `0`; I < DI->getNumArgs() - `1`; ++I) {
1558	if (I != `0`)
1559	S += ", ";
1560	S += Values [I];
1561	}
1562	S += ")";
1563
1564	return std::make_pair(x: Callee.getReturnType(), y&: S);
1565	}
1566
1567	std::pair<Type, std::string>
1568	Intrinsic::DagEmitter::emitDagCast(const DagInit DI, bool* IsBitCast) {
1569	// (cast MOD VAL) -> cast VAL to type given by MOD.*
1570	std::pair<Type, std::string> R =
1571	emitDagArg(Arg: DI->getArg(Num: DI->getNumArgs() - `1`),
1572	ArgName: std::string (DI->getArgNameStr(Num: DI->getNumArgs() - `1`)));
1573	Type castToType = R.first;
1574	for (unsigned ArgIdx = `0`; ArgIdx < DI->getNumArgs() - `1`; ++ArgIdx) {
1575
1576	// MOD can take several forms:
1577	// 1. $X - take the type of parameter / variable X.
1578	// 2. The value "R" - take the type of the return type.
1579	// 3. a type string
1580	// 4. The value "U" or "S" to switch the signedness.
1581	// 5. The value "H" or "D" to half or double the bitwidth.
1582	// 6. The value "8" to convert to 8-bit (signed) integer lanes.
1583	if (!DI->getArgNameStr(Num: ArgIdx).empty()) {
1584	assert_with_loc(Assertion: Intr.Variables.find(x: DI->getArgNameStr(Num: ArgIdx)) !=
1585	Intr.Variables.end(),
1586	Str: "Variable not found");
1587	castToType =
1588	Intr.Variables [std::string (DI->getArgNameStr(Num: ArgIdx))].getType();
1589	} else {
1590	const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: ArgIdx));
1591	assert_with_loc(Assertion: SI, Str: "Expected string type or $Name for cast type");
1592
1593	if (SI->getAsUnquotedString() == "R") {
1594	castToType = Intr.getReturnType();
1595	} else if (SI->getAsUnquotedString() == "U") {
1596	castToType.makeUnsigned();
1597	} else if (SI->getAsUnquotedString() == "S") {
1598	castToType.makeSigned();
1599	} else if (SI->getAsUnquotedString() == "H") {
1600	castToType.halveLanes();
1601	} else if (SI->getAsUnquotedString() == "D") {
1602	castToType.doubleLanes();
1603	} else if (SI->getAsUnquotedString() == "8") {
1604	castToType.makeInteger(ElemWidth: `8`, Sign: true);
1605	} else if (SI->getAsUnquotedString() == "32") {
1606	castToType.make32BitElement();
1607	} else {
1608	castToType = Type::fromTypedefName(Name: SI->getAsUnquotedString());
1609	assert_with_loc(Assertion: !castToType.isVoid(), Str: "Unknown typedef");
1610	}
1611	}
1612	}
1613
1614	std::string S;
1615	if (IsBitCast)
1616	S = "__builtin_bit_cast(" + castToType.str() + ", " + R.second + ")";
1617	else
1618	S = "(" + castToType.str() + ")(" + R.second + ")";
1619
1620	return std::make_pair(x&: castToType, y&: S);
1621	}
1622
1623	std::pair<Type, std::string>
1624	Intrinsic::DagEmitter::emitDagShuffle(const DagInit *DI) {
1625	// See the documentation in arm_neon.td for a description of these operators.
1626	class LowHalf : public SetTheory::Operator {
1627	public:
1628	void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1629	ArrayRef<SMLoc> Loc) override {
1630	SetTheory::RecSet Elts2;
1631	ST.evaluate(begin: Expr->arg_begin(), end: Expr->arg_end(), Elts&: Elts2, Loc);
1632	Elts.insert(Start: Elts2.begin(), End: Elts2.begin() + (Elts2.size() / `2`));
1633	}
1634	};
1635
1636	class HighHalf : public SetTheory::Operator {
1637	public:
1638	void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1639	ArrayRef<SMLoc> Loc) override {
1640	SetTheory::RecSet Elts2;
1641	ST.evaluate(begin: Expr->arg_begin(), end: Expr->arg_end(), Elts&: Elts2, Loc);
1642	Elts.insert(Start: Elts2.begin() + (Elts2.size() / `2`), End: Elts2.end());
1643	}
1644	};
1645
1646	class Rev : public SetTheory::Operator {
1647	unsigned ElementSize;
1648
1649	public:
1650	Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
1651
1652	void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1653	ArrayRef<SMLoc> Loc) override {
1654	SetTheory::RecSet Elts2;
1655	ST.evaluate(begin: Expr->arg_begin() + `1`, end: Expr->arg_end(), Elts&: Elts2, Loc);
1656
1657	int64_t VectorSize = cast<IntInit>(Val: Expr->getArg(Num: `0`))->getValue();
1658	VectorSize /= ElementSize;
1659
1660	std::vector<const Record *> Revved;
1661	for (unsigned VI = `0`; VI < Elts2.size(); VI += VectorSize) {
1662	for (int LI = VectorSize - `1`; LI >= `0`; --LI) {
1663	Revved.push_back(x: Elts2 [VI + LI]);
1664	}
1665	}
1666
1667	Elts.insert_range(R&: Revved);
1668	}
1669	};
1670
1671	class MaskExpander : public SetTheory::Expander {
1672	unsigned N;
1673
1674	public:
1675	MaskExpander(unsigned N) : N(N) {}
1676
1677	void expand(SetTheory &ST, const Record *R,
1678	SetTheory::RecSet &Elts) override {
1679	unsigned Addend = `0`;
1680	if (R->getName() == "mask0")
1681	Addend = `0`;
1682	else if (R->getName() == "mask1")
1683	Addend = N;
1684	else
1685	return;
1686	for (unsigned I = `0`; I < N; ++I)
1687	Elts.insert(X: R->getRecords().getDef(Name: "sv" + utostr(X: I + Addend)));
1688	}
1689	};
1690
1691	// (shuffle arg1, arg2, sequence)
1692	std::pair<Type, std::string> Arg1 =
1693	emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: std::string (DI->getArgNameStr(Num: `0`)));
1694	std::pair<Type, std::string> Arg2 =
1695	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1696	assert_with_loc(Assertion: Arg1.first == Arg2.first,
1697	Str: "Different types in arguments to shuffle!");
1698
1699	SetTheory ST;
1700	SetTheory::RecSet Elts;
1701	ST.addOperator(Name: "lowhalf", std::make_unique<LowHalf>());
1702	ST.addOperator(Name: "highhalf", std::make_unique<HighHalf>());
1703	ST.addOperator(Name: "rev",
1704	std::make_unique<Rev>(args: Arg1.first.getElementSizeInBits()));
1705	ST.addExpander(ClassName: "MaskExpand",
1706	std::make_unique<MaskExpander>(args: Arg1.first.getNumElements()));
1707	ST.evaluate(Expr: DI->getArg(Num: `2`), Elts, Loc: {});
1708
1709	std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
1710	for (auto &E : Elts) {
1711	StringRef Name = E->getName();
1712	assert_with_loc(Assertion: Name.starts_with(Prefix: "sv"),
1713	Str: "Incorrect element kind in shuffle mask!");
1714	S += ", " + Name.drop_front(N: `2`).str();
1715	}
1716	S += ")";
1717
1718	// Recalculate the return type - the shuffle may have halved or doubled it.
1719	Type T(Arg1.first);
1720	if (Elts.size() > T.getNumElements()) {
1721	assert_with_loc(
1722	Assertion: Elts.size() == T.getNumElements() * `2`,
1723	Str: "Can only double or half the number of elements in a shuffle!");
1724	T.doubleLanes();
1725	} else if (Elts.size() < T.getNumElements()) {
1726	assert_with_loc(
1727	Assertion: Elts.size() == T.getNumElements() / `2`,
1728	Str: "Can only double or half the number of elements in a shuffle!");
1729	T.halveLanes();
1730	}
1731
1732	return std::make_pair(x&: T, y&: S);
1733	}
1734
1735	std::pair<Type, std::string>
1736	Intrinsic::DagEmitter::emitDagDup(const DagInit *DI) {
1737	assert_with_loc(Assertion: DI->getNumArgs() == `1`, Str: "dup() expects one argument");
1738	std::pair<Type, std::string> A =
1739	emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: std::string (DI->getArgNameStr(Num: `0`)));
1740	assert_with_loc(Assertion: A.first.isScalar(), Str: "dup() expects a scalar argument");
1741
1742	Type T = Intr.getBaseType();
1743	assert_with_loc(Assertion: T.isVector(), Str: "dup() used but default type is scalar!");
1744	std::string S = "(" + T.str() + ") {";
1745	for (unsigned I = `0`; I < T.getNumElements(); ++I) {
1746	if (I != `0`)
1747	S += ", ";
1748	S += A.second;
1749	}
1750	S += "}";
1751
1752	return std::make_pair(x&: T, y&: S);
1753	}
1754
1755	std::pair<Type, std::string>
1756	Intrinsic::DagEmitter::emitDagDupTyped(const DagInit *DI) {
1757	assert_with_loc(Assertion: DI->getNumArgs() == `2`, Str: "dup_typed() expects two arguments");
1758	std::pair<Type, std::string> B =
1759	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1760	assert_with_loc(Assertion: B.first.isScalar(),
1761	Str: "dup_typed() requires a scalar as the second argument");
1762	Type T;
1763	// If the type argument is a constant string, construct the type directly.
1764	if (const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: `0`))) {
1765	T = Type::fromTypedefName(Name: SI->getAsUnquotedString());
1766	assert_with_loc(Assertion: !T.isVoid(), Str: "Unknown typedef");
1767	} else
1768	T = emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: std::string (DI->getArgNameStr(Num: `0`))).first;
1769
1770	assert_with_loc(Assertion: T.isVector(), Str: "dup_typed() used but target type is scalar!");
1771	std::string S = "(" + T.str() + ") {";
1772	for (unsigned I = `0`; I < T.getNumElements(); ++I) {
1773	if (I != `0`)
1774	S += ", ";
1775	S += B.second;
1776	}
1777	S += "}";
1778
1779	return std::make_pair(x&: T, y&: S);
1780	}
1781
1782	std::pair<Type, std::string>
1783	Intrinsic::DagEmitter::emitDagSplat(const DagInit *DI) {
1784	assert_with_loc(Assertion: DI->getNumArgs() == `2`, Str: "splat() expects two arguments");
1785	std::pair<Type, std::string> A =
1786	emitDagArg(Arg: DI->getArg(Num: `0`), ArgName: std::string (DI->getArgNameStr(Num: `0`)));
1787	std::pair<Type, std::string> B =
1788	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1789
1790	assert_with_loc(Assertion: B.first.isScalar(),
1791	Str: "splat() requires a scalar int as the second argument");
1792
1793	std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
1794	for (unsigned I = `0`; I < Intr.getBaseType().getNumElements(); ++I) {
1795	S += ", " + B.second;
1796	}
1797	S += ")";
1798
1799	return std::make_pair(x: Intr.getBaseType(), y&: S);
1800	}
1801
1802	std::pair<Type, std::string>
1803	Intrinsic::DagEmitter::emitDagSaveTemp(const DagInit *DI) {
1804	assert_with_loc(Assertion: DI->getNumArgs() == `2`, Str: "save_temp() expects two arguments");
1805	std::pair<Type, std::string> A =
1806	emitDagArg(Arg: DI->getArg(Num: `1`), ArgName: std::string (DI->getArgNameStr(Num: `1`)));
1807
1808	assert_with_loc(Assertion: !A.first.isVoid(),
1809	Str: "Argument to save_temp() must have non-void type!");
1810
1811	std::string N = std::string (DI->getArgNameStr(Num: `0`));
1812	assert_with_loc(Assertion: !N.empty(),
1813	Str: "save_temp() expects a name as the first argument");
1814
1815	auto [It, Inserted] =
1816	Intr.Variables.try_emplace(k: N, args&: A.first, args: N + Intr.VariablePostfix);
1817	assert_with_loc(Assertion: Inserted, Str: "Variable already defined!");
1818
1819	std::string S = A.first.str() + " " + It ->second.getName() + " = " + A.second;
1820
1821	return std::make_pair(x: Type::getVoid(), y&: S);
1822	}
1823
1824	std::pair<Type, std::string>
1825	Intrinsic::DagEmitter::emitDagNameReplace(const DagInit *DI) {
1826	std::string S = Intr.Name;
1827
1828	assert_with_loc(Assertion: DI->getNumArgs() == `2`, Str: "name_replace requires 2 arguments!");
1829	std::string ToReplace = cast<StringInit>(Val: DI->getArg(Num: `0`))->getAsUnquotedString();
1830	std::string ReplaceWith = cast<StringInit>(Val: DI->getArg(Num: `1`))->getAsUnquotedString();
1831
1832	size_t Idx = S.find(str: ToReplace);
1833
1834	assert_with_loc(Assertion: Idx != std::string::npos, Str: "name should contain '" + ToReplace + "'!");
1835	S.replace(pos: Idx, n: ToReplace.size(), str: ReplaceWith);
1836
1837	return std::make_pair(x: Type::getVoid(), y&: S);
1838	}
1839
1840	std::pair<Type, std::string>
1841	Intrinsic::DagEmitter::emitDagLiteral(const DagInit *DI) {
1842	std::string Ty = cast<StringInit>(Val: DI->getArg(Num: `0`))->getAsUnquotedString();
1843	std::string Value = cast<StringInit>(Val: DI->getArg(Num: `1`))->getAsUnquotedString();
1844	return std::make_pair(x: Type::fromTypedefName(Name: Ty), y&: Value);
1845	}
1846
1847	std::pair<Type, std::string>
1848	Intrinsic::DagEmitter::emitDagArg(const Init *Arg, std::string ArgName) {
1849	if (!ArgName.empty()) {
1850	assert_with_loc(Assertion: !Arg->isComplete(),
1851	Str: "Arguments must either be DAGs or names, not both!");
1852	assert_with_loc(Assertion: Intr.Variables.find(x: ArgName) != Intr.Variables.end(),
1853	Str: "Variable not defined!");
1854	Variable &V = Intr.Variables [ArgName];
1855	return std::make_pair(x: V.getType(), y: V.getName());
1856	}
1857
1858	assert(Arg && "Neither ArgName nor Arg?!");
1859	const auto *DI = dyn_cast<DagInit>(Val: Arg);
1860	assert_with_loc(Assertion: DI, Str: "Arguments must either be DAGs or names!");
1861
1862	return emitDag(DI);
1863	}
1864
1865	std::string Intrinsic::generate() {
1866	// Avoid duplicated code for big and little endian
1867	if (isBigEndianSafe()) {
1868	generateImpl(ReverseArguments: false, NamePrefix: "", CallPrefix: "");
1869	return OS.str();
1870	}
1871	// Little endian intrinsics are simple and don't require any argument
1872	// swapping.
1873	OS << "#ifdef __LITTLE_ENDIAN__\n";
1874
1875	generateImpl(ReverseArguments: false, NamePrefix: "", CallPrefix: "");
1876
1877	OS << "#else\n";
1878
1879	// Big endian intrinsics are more complex. The user intended these intrinsics
1880	// to operate on a vector "as-if" loaded by LDR (for AArch64), VLDR (for
1881	// 64-bit vectors on AArch32), or VLDM (for 128-bit vectors on AArch32) but
1882	// we load as-if LD1 (for AArch64) or VLD1 (for AArch32). So we should swap
1883	// all arguments and swap the return value too.
1884	//
1885	// If we call sub-intrinsics, we should call a version that does
1886	// not re-swap the arguments!
1887	generateImpl(ReverseArguments: true, NamePrefix: "", CallPrefix: "__noswap_");
1888
1889	// If we're needed early, create a non-swapping variant for
1890	// big-endian.
1891	if (NeededEarly) {
1892	generateImpl(ReverseArguments: false, NamePrefix: "__noswap_", CallPrefix: "__noswap_");
1893	}
1894	OS << "#endif\n\n";
1895
1896	return OS.str();
1897	}
1898
1899	void Intrinsic::generateImpl(bool ReverseArguments,
1900	StringRef NamePrefix, StringRef CallPrefix) {
1901	CurrentRecord = R;
1902
1903	// If we call a macro, our local variables may be corrupted due to
1904	// lack of proper lexical scoping. So, add a globally unique postfix
1905	// to every variable.
1906	//
1907	// indexBody() should have set up the Dependencies set by now.
1908	for (auto *I : Dependencies)
1909	if (I->UseMacro) {
1910	VariablePostfix = "_" + utostr(X: Emitter.getUniqueNumber());
1911	break;
1912	}
1913
1914	initVariables();
1915
1916	emitPrototype(NamePrefix);
1917
1918	if (IsUnavailable) {
1919	OS << " __attribute__((unavailable));";
1920	} else {
1921	emitOpeningBrace();
1922	// Emit return variable declaration first as to not trigger
1923	// -Wdeclaration-after-statement.
1924	emitReturnVarDecl();
1925	emitShadowedArgs();
1926	if (ReverseArguments)
1927	emitArgumentReversal();
1928	emitBody(CallPrefix);
1929	if (ReverseArguments)
1930	emitReturnReversal();
1931	emitReturn();
1932	emitClosingBrace();
1933	}
1934	OS << "\n";
1935
1936	CurrentRecord = nullptr;
1937	}
1938
1939	void Intrinsic::indexBody() {
1940	CurrentRecord = R;
1941
1942	initVariables();
1943	// Emit return variable declaration first as to not trigger
1944	// -Wdeclaration-after-statement.
1945	emitReturnVarDecl();
1946	emitBody(CallPrefix: "");
1947	OS.str(s: "");
1948
1949	CurrentRecord = nullptr;
1950	}
1951
1952	//===----------------------------------------------------------------------===//
1953	// NeonEmitter implementation
1954	//===----------------------------------------------------------------------===//
1955
1956	Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
1957	std::optional<std::string> MangledName) {
1958	// First, look up the name in the intrinsic map.
1959	assert_with_loc(Assertion: IntrinsicMap.find(x: Name) != IntrinsicMap.end(),
1960	Str: ("Intrinsic '" + Name + "' not found!").str());
1961	auto &V = IntrinsicMap.find(x: Name)->second;
1962	std::vector<Intrinsic *> GoodVec;
1963
1964	// Create a string to print if we end up failing.
1965	std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
1966	for (unsigned I = `0`; I < Types.size(); ++I) {
1967	if (I != `0`)
1968	ErrMsg += ", ";
1969	ErrMsg += Types [I].str();
1970	}
1971	ErrMsg += ")'\n";
1972	ErrMsg += "Available overloads:\n";
1973
1974	// Now, look through each intrinsic implementation and see if the types are
1975	// compatible.
1976	for (auto &I : V) {
1977	ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName();
1978	ErrMsg += "(";
1979	for (unsigned A = `0`; A < I.getNumParams(); ++A) {
1980	if (A != `0`)
1981	ErrMsg += ", ";
1982	ErrMsg += I.getParamType(I: A).str();
1983	}
1984	ErrMsg += ")\n";
1985
1986	if (MangledName && MangledName != I.getMangledName(ForceClassS: true))
1987	continue;
1988
1989	if (I.getNumParams() != Types.size())
1990	continue;
1991
1992	unsigned ArgNum = `0`;
1993	bool MatchingArgumentTypes = all_of(Range&: Types, P: [&](const auto &Type) {
1994	return Type == I.getParamType(I: ArgNum++);
1995	});
1996
1997	if (MatchingArgumentTypes)
1998	GoodVec.push_back(x: &I);
1999	}
2000
2001	assert_with_loc(Assertion: !GoodVec.empty(),
2002	Str: "No compatible intrinsic found - " + ErrMsg);
2003	assert_with_loc(Assertion: GoodVec.size() == `1`, Str: "Multiple overloads found - " + ErrMsg);
2004
2005	return *GoodVec.front();
2006	}
2007
2008	void NeonEmitter::createIntrinsic(const Record *R,
2009	SmallVectorImpl<Intrinsic *> &Out) {
2010	std::string Name = std::string (R->getValueAsString(FieldName: "Name"));
2011	std::string Proto = std::string (R->getValueAsString(FieldName: "Prototype"));
2012	std::string Types = std::string (R->getValueAsString(FieldName: "Types"));
2013	const Record *OperationRec = R->getValueAsDef(FieldName: "Operation");
2014	bool BigEndianSafe = R->getValueAsBit(FieldName: "BigEndianSafe");
2015	std::string ArchGuard = std::string (R->getValueAsString(FieldName: "ArchGuard"));
2016	std::string TargetGuard = std::string (R->getValueAsString(FieldName: "TargetGuard"));
2017	bool IsUnavailable = OperationRec->getValueAsBit(FieldName: "Unavailable");
2018	std::string CartesianProductWith = std::string (R->getValueAsString(FieldName: "CartesianProductWith"));
2019
2020	// Set the global current record. This allows assert_with_loc to produce
2021	// decent location information even when highly nested.
2022	CurrentRecord = R;
2023
2024	const ListInit *Body = OperationRec->getValueAsListInit(FieldName: "Ops");
2025
2026	std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Str: Types);
2027
2028	ClassKind CK = ClassNone;
2029	if (!R->getDirectSuperClasses().empty())
2030	CK = ClassMap [R->getDirectSuperClasses()[`0`].first];
2031
2032	std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
2033	if (!CartesianProductWith.empty()) {
2034	std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(Str: CartesianProductWith);
2035	for (auto TS : TypeSpecs) {
2036	Type DefaultT(TS, ".");
2037	for (auto SrcTS : ProductTypeSpecs) {
2038	Type DefaultSrcT(SrcTS, ".");
2039	if (TS == SrcTS \|\|
2040	DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
2041	continue;
2042	NewTypeSpecs.push_back(x: std::make_pair(x&: TS, y&: SrcTS));
2043	}
2044	}
2045	} else {
2046	for (auto TS : TypeSpecs) {
2047	NewTypeSpecs.push_back(x: std::make_pair(x&: TS, y&: TS));
2048	}
2049	}
2050
2051	sort(C&: NewTypeSpecs);
2052	NewTypeSpecs.erase(first: llvm::unique(R&: NewTypeSpecs), last: NewTypeSpecs.end());
2053	auto &Entry = IntrinsicMap [Name];
2054
2055	for (auto &I : NewTypeSpecs) {
2056
2057	// MFloat8 type is only available on AArch64. If encountered set ArchGuard
2058	// correctly.
2059	std::string NewArchGuard = ArchGuard;
2060	if (Type (I.first, ".").isMFloat8()) {
2061	if (NewArchGuard.empty()) {
2062	NewArchGuard = "defined(__aarch64__)";
2063	} else if (NewArchGuard.find(s: "defined(__aarch64__)") ==
2064	std::string::npos) {
2065	NewArchGuard = "defined(__aarch64__) && (" + NewArchGuard + ")";
2066	}
2067	}
2068	Entry.emplace_back(args&: R, args&: Name, args&: Proto, args&: I.first, args&: I.second, args&: CK, args&: Body, args&: *this,
2069	args&: NewArchGuard, args&: TargetGuard, args&: IsUnavailable, args&: BigEndianSafe);
2070	Out.push_back(Elt: &Entry.back());
2071	}
2072
2073	CurrentRecord = nullptr;
2074	}
2075
2076	/// genBuiltinsDef: Generate the builtin infos, checking for unique builtin
2077	/// declarations.
2078	void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2079	SmallVectorImpl<Intrinsic *> &Defs) {
2080	// We only want to emit a builtin once, and in order of its name.
2081	std::map<std::string, Intrinsic *> Builtins;
2082
2083	llvm::StringToOffsetTable Table;
2084	Table.GetOrAddStringOffset(Str: "");
2085	Table.GetOrAddStringOffset(Str: "n");
2086
2087	for (auto *Def : Defs) {
2088	if (Def->hasBody())
2089	continue;
2090
2091	if (Builtins.insert(x: {Def->getMangledName(), Def}).second) {
2092	Table.GetOrAddStringOffset(Str: Def->getMangledName());
2093	Table.GetOrAddStringOffset(Str: Def->getBuiltinTypeStr());
2094	Table.GetOrAddStringOffset(Str: Def->getTargetGuard());
2095	}
2096	}
2097
2098	OS << "#ifdef GET_NEON_BUILTIN_ENUMERATORS\n";
2099	for (const auto &[Name, Def] : Builtins) {
2100	OS << " BI__builtin_neon_" << Name << ",\n";
2101	}
2102	OS << "#endif // GET_NEON_BUILTIN_ENUMERATORS\n\n";
2103
2104	OS << "#ifdef GET_NEON_BUILTIN_STR_TABLE\n";
2105	Table.EmitStringTableDef(OS, Name: "BuiltinStrings");
2106	OS << "#endif // GET_NEON_BUILTIN_STR_TABLE\n\n";
2107
2108	OS << "#ifdef GET_NEON_BUILTIN_INFOS\n";
2109	for (const auto &[Name, Def] : Builtins) {
2110	OS << " Builtin::Info{Builtin::Info::StrOffsets{"
2111	<< Table.GetStringOffset(Str: Def->getMangledName()) << " /* "
2112	<< Def->getMangledName() << " */, ";
2113	OS << Table.GetStringOffset(Str: Def->getBuiltinTypeStr()) << " /* "
2114	<< Def->getBuiltinTypeStr() << " */, ";
2115	OS << Table.GetStringOffset(Str: "n") << " /* n */, ";
2116	OS << Table.GetStringOffset(Str: Def->getTargetGuard()) << " /* "
2117	<< Def->getTargetGuard() << " */}, ";
2118	OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
2119	}
2120	OS << "#endif // GET_NEON_BUILTIN_INFOS\n\n";
2121	}
2122
2123	void NeonEmitter::genStreamingSVECompatibleList(
2124	raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2125	OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n";
2126
2127	std::set<std::string> Emitted;
2128	for (auto *Def : Defs) {
2129	// If the def has a body (that is, it has Operation DAGs), it won't call
2130	// __builtin_neon_ so we don't need to generate a definition for it.*
2131	if (Def->hasBody())
2132	continue;
2133
2134	std::string Name = Def->getMangledName();
2135	if (Emitted.find(x: Name) != Emitted.end())
2136	continue;
2137
2138	// FIXME: We should make exceptions here for some NEON builtins that are
2139	// permitted in streaming mode.
2140	OS << "case NEON::BI__builtin_neon_" << Name
2141	<< ": BuiltinType = ArmNonStreaming; break;\n";
2142	Emitted.insert(x: Name);
2143	}
2144	OS << "#endif\n\n";
2145	}
2146
2147	/// Generate the ARM and AArch64 overloaded type checking code for
2148	/// SemaChecking.cpp, checking for unique builtin declarations.
2149	void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2150	SmallVectorImpl<Intrinsic *> &Defs) {
2151	OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2152
2153	// We record each overload check line before emitting because subsequent Inst
2154	// definitions may extend the number of permitted types (i.e. augment the
2155	// Mask). Use std::map to avoid sorting the table by hash number.
2156	struct OverloadInfo {
2157	uint64_t Mask = `0ULL`;
2158	int PtrArgNum = `0`;
2159	bool HasConstPtr = false;
2160	OverloadInfo() = default;
2161	};
2162	std::map<std::string, OverloadInfo> OverloadMap;
2163
2164	for (auto *Def : Defs) {
2165	// If the def has a body (that is, it has Operation DAGs), it won't call
2166	// __builtin_neon_ so we don't need to generate a definition for it.*
2167	if (Def->hasBody())
2168	continue;
2169	// Functions which have a scalar argument cannot be overloaded, no need to
2170	// check them if we are emitting the type checking code.
2171	if (Def->protoHasScalar())
2172	continue;
2173
2174	uint64_t Mask = `0ULL`;
2175	Mask \|= `1ULL` << Def->getPolymorphicKeyType().getNeonEnum();
2176
2177	// Check if the function has a pointer or const pointer argument.
2178	int PtrArgNum = -`1`;
2179	bool HasConstPtr = false;
2180	for (unsigned I = `0`; I < Def->getNumParams(); ++I) {
2181	const auto &Type = Def->getParamType(I);
2182	if (Type.isPointer()) {
2183	PtrArgNum = I;
2184	HasConstPtr = Type.isConstPointer();
2185	}
2186	}
2187
2188	// For sret builtins, adjust the pointer argument index.
2189	if (PtrArgNum >= `0` && Def->getReturnType().getNumVectors() > `1`)
2190	PtrArgNum += `1`;
2191
2192	std::string Name = Def->getName();
2193	// Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2194	// vst1_lane, vldap1_lane, and vstl1_lane intrinsics. Using a pointer to
2195	// the vector element type with one of those operations causes codegen to
2196	// select an aligned load/store instruction. If you want an unaligned
2197	// operation, the pointer argument needs to have less alignment than element
2198	// type, so just accept any pointer type.
2199	if (Name == "vld1_lane" \|\| Name == "vld1_dup" \|\| Name == "vst1_lane" \|\|
2200	Name == "vldap1_lane" \|\| Name == "vstl1_lane") {
2201	PtrArgNum = -`1`;
2202	HasConstPtr = false;
2203	}
2204
2205	if (Mask) {
2206	OverloadInfo &OI = OverloadMap [Def->getMangledName()];
2207	OI.Mask \|= Mask;
2208	OI.PtrArgNum \|= PtrArgNum;
2209	OI.HasConstPtr = HasConstPtr;
2210	}
2211	}
2212
2213	for (auto &I : OverloadMap) {
2214	OverloadInfo &OI = I.second;
2215
2216	OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
2217	OS << "mask = 0x" << Twine::utohexstr(Val: OI.Mask) << "ULL";
2218	if (OI.PtrArgNum >= `0`)
2219	OS << "; PtrArgNum = " << OI.PtrArgNum;
2220	if (OI.HasConstPtr)
2221	OS << "; HasConstPtr = true";
2222	OS << "; break;\n";
2223	}
2224	OS << "#endif\n\n";
2225	}
2226
2227	inline bool
2228	NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
2229	const ArrayRef<ImmCheck> ChecksB) {
2230	// If multiple intrinsics map to the same builtin, we must ensure that the
2231	// intended range checks performed in SemaArm.cpp do not contradict each
2232	// other, as these are emitted once per-buitlin.
2233	//
2234	// The arguments to be checked and type of each check to be performed must be
2235	// the same. The element types may differ as they will be resolved
2236	// per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes
2237	// are not and so must be the same.
2238	bool compat =
2239	std::equal(first1: ChecksA.begin(), last1: ChecksA.end(), first2: ChecksB.begin(), last2: ChecksB.end(),
2240	binary_pred: [](const auto &A, const auto &B) {
2241	return A.getImmArgIdx() == B.getImmArgIdx() &&
2242	A.getKind() == B.getKind() &&
2243	A.getVecSizeInBits() == B.getVecSizeInBits();
2244	});
2245
2246	return compat;
2247	}
2248
2249	void NeonEmitter::genIntrinsicRangeCheckCode(
2250	raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2251	std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted;
2252
2253	OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2254	for (auto &Def : Defs) {
2255	// If the Def has a body (operation DAGs), it is not a __builtin_neon_
2256	if (Def->hasBody() \|\| !Def->hasImmediate())
2257	continue;
2258
2259	// Sorted by immediate argument index
2260	ArrayRef<ImmCheck> Checks = Def->getImmChecks();
2261
2262	auto [It, Inserted] = Emitted.try_emplace(k: Def->getMangledName(), args&: Checks);
2263	if (!Inserted) {
2264	assert(areRangeChecksCompatible(Checks, It ->second) &&
2265	"Neon intrinsics with incompatible immediate range checks cannot "
2266	"share a builtin.");
2267	continue; // Ensure this is emitted only once
2268	}
2269
2270	// Emit builtin's range checks
2271	OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
2272	for (const auto &Check : Checks) {
2273	OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
2274	<< Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
2275	<< Check.getVecSizeInBits() << ");\n"
2276	<< " break;\n";
2277	}
2278	}
2279
2280	OS << "#endif\n\n";
2281	}
2282
2283	/// runHeader - Emit a file with sections defining:
2284	/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2285	/// 2. the SemaChecking code for the type overload checking.
2286	/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2287	void NeonEmitter::runHeader(raw_ostream &OS) {
2288	SmallVector<Intrinsic *, `128`> Defs;
2289	for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2290	createIntrinsic(R, Out&: Defs);
2291
2292	// Generate shared BuiltinsXXX.def
2293	genBuiltinsDef(OS, Defs);
2294
2295	// Generate ARM overloaded type checking code for SemaChecking.cpp
2296	genOverloadTypeCheckCode(OS, Defs);
2297
2298	genStreamingSVECompatibleList(OS, Defs);
2299
2300	// Generate ARM range checking code for shift/lane immediates.
2301	genIntrinsicRangeCheckCode(OS, Defs);
2302	}
2303
2304	static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
2305	std::string TypedefTypes(types);
2306	std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(Str: TypedefTypes);
2307
2308	// Emit vector typedefs.
2309	bool InIfdef = false;
2310	for (auto &TS : TDTypeVec) {
2311	bool IsA64 = false;
2312	Type T(TS, ".");
2313	if (T.isDouble() \|\| T.isMFloat8())
2314	IsA64 = true;
2315
2316	if (InIfdef && !IsA64) {
2317	OS << "#endif\n";
2318	InIfdef = false;
2319	}
2320	if (!InIfdef && IsA64) {
2321	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2322	InIfdef = true;
2323	}
2324
2325	if (T.isPoly())
2326	OS << "typedef __attribute__((neon_polyvector_type(";
2327	else
2328	OS << "typedef __attribute__((neon_vector_type(";
2329
2330	Type T2 = T;
2331	T2.makeScalar();
2332	OS << T.getNumElements();
2333	OS << "))) " << T2.str();
2334	OS << " " << T.str() << ";\n";
2335	}
2336	if (InIfdef)
2337	OS << "#endif\n";
2338	OS << "\n";
2339
2340	// Emit struct typedefs.
2341	InIfdef = false;
2342	for (unsigned NumMembers = `2`; NumMembers <= `4`; ++NumMembers) {
2343	for (auto &TS : TDTypeVec) {
2344	bool IsA64 = false;
2345	Type T(TS, ".");
2346	if (T.isDouble() \|\| T.isMFloat8())
2347	IsA64 = true;
2348
2349	if (InIfdef && !IsA64) {
2350	OS << "#endif\n";
2351	InIfdef = false;
2352	}
2353	if (!InIfdef && IsA64) {
2354	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2355	InIfdef = true;
2356	}
2357
2358	const char Mods[] = { static_cast<char>(`'2'` + (NumMembers - `2`)), `0`};
2359	Type VT(TS, Mods);
2360	OS << "typedef struct " << VT.str() << " {\n";
2361	OS << " " << T.str() << " val";
2362	OS << "[" << NumMembers << "]";
2363	OS << ";\n} ";
2364	OS << VT.str() << ";\n";
2365	OS << "\n";
2366	}
2367	}
2368	if (InIfdef)
2369	OS << "#endif\n";
2370	}
2371
2372	/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
2373	/// is comprised of type definitions and function declarations.
2374	void NeonEmitter::run(raw_ostream &OS) {
2375	OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
2376	"------------------------------"
2377	"---===\n"
2378	" *\n"
2379	" * Permission is hereby granted, free of charge, to any person "
2380	"obtaining "
2381	"a copy\n"
2382	" * of this software and associated documentation files (the "
2383	"\"Software\"),"
2384	" to deal\n"
2385	" * in the Software without restriction, including without limitation "
2386	"the "
2387	"rights\n"
2388	" * to use, copy, modify, merge, publish, distribute, sublicense, "
2389	"and/or sell\n"
2390	" * copies of the Software, and to permit persons to whom the Software "
2391	"is\n"
2392	" * furnished to do so, subject to the following conditions:\n"
2393	" *\n"
2394	" * The above copyright notice and this permission notice shall be "
2395	"included in\n"
2396	" * all copies or substantial portions of the Software.\n"
2397	" *\n"
2398	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2399	"EXPRESS OR\n"
2400	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2401	"MERCHANTABILITY,\n"
2402	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2403	"SHALL THE\n"
2404	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2405	"OTHER\n"
2406	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2407	"ARISING FROM,\n"
2408	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2409	"DEALINGS IN\n"
2410	" * THE SOFTWARE.\n"
2411	" *\n"
2412	" *===-----------------------------------------------------------------"
2413	"---"
2414	"---===\n"
2415	" */\n\n";
2416
2417	OS << "#ifndef __ARM_NEON_H\n";
2418	OS << "#define __ARM_NEON_H\n\n";
2419
2420	OS << "#ifndef __ARM_FP\n";
2421	OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
2422	"Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
2423	OS << "#else\n\n";
2424
2425	OS << "#include <stdint.h>\n\n";
2426
2427	OS << "#include <arm_bf16.h>\n";
2428
2429	OS << "#include <arm_vector_types.h>\n";
2430
2431	// For now, signedness of polynomial types depends on target
2432	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2433	OS << "typedef uint8_t poly8_t;\n";
2434	OS << "typedef uint16_t poly16_t;\n";
2435	OS << "typedef uint64_t poly64_t;\n";
2436	OS << "typedef __uint128_t poly128_t;\n";
2437	OS << "#else\n";
2438	OS << "typedef int8_t poly8_t;\n";
2439	OS << "typedef int16_t poly16_t;\n";
2440	OS << "typedef int64_t poly64_t;\n";
2441	OS << "#endif\n";
2442	emitNeonTypeDefs(types: "PcQPcPsQPsPlQPl", OS);
2443
2444	OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2445	"__nodebug__))\n\n";
2446
2447	// Shufflevector arguments lists for endian-swapping vectors for big-endian
2448	// targets. For AArch64, we need to reverse every lane in the vector, but for
2449	// AArch32 we need to reverse the lanes within each 64-bit chunk of the
2450	// vector. The naming convention here is __lane_reverse_<n>_<m>, where <n> is
2451	// the length of the vector in bits, and <m> is length of each lane in bits.
2452	OS << "#if !defined(__LITTLE_ENDIAN__)\n";
2453	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2454	OS << "#define __lane_reverse_64_32 1,0\n";
2455	OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2456	OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2457	OS << "#define __lane_reverse_128_64 1,0\n";
2458	OS << "#define __lane_reverse_128_32 3,2,1,0\n";
2459	OS << "#define __lane_reverse_128_16 7,6,5,4,3,2,1,0\n";
2460	OS << "#define __lane_reverse_128_8 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0\n";
2461	OS << "#else\n";
2462	OS << "#define __lane_reverse_64_32 1,0\n";
2463	OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2464	OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2465	OS << "#define __lane_reverse_128_64 0,1\n";
2466	OS << "#define __lane_reverse_128_32 1,0,3,2\n";
2467	OS << "#define __lane_reverse_128_16 3,2,1,0,7,6,5,4\n";
2468	OS << "#define __lane_reverse_128_8 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8\n";
2469	OS << "#endif\n";
2470	OS << "#endif\n";
2471
2472	SmallVector<Intrinsic *, `128`> Defs;
2473	for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2474	createIntrinsic(R, Out&: Defs);
2475
2476	for (auto *I : Defs)
2477	I->indexBody();
2478
2479	stable_sort(Range&: Defs, C: deref<std::less<>>());
2480
2481	// Only emit a def when its requirements have been met.
2482	// FIXME: This loop could be made faster, but it's fast enough for now.
2483	bool MadeProgress = true;
2484	std::string InGuard;
2485	while (!Defs.empty() && MadeProgress) {
2486	MadeProgress = false;
2487
2488	for (SmallVector<Intrinsic *, `128`>::iterator I = Defs.begin();
2489	I != Defs.end(); /No step/) {
2490	bool DependenciesSatisfied = true;
2491	for (auto II : (I)->getDependencies()) {
2492	if (is_contained(Range&: Defs, Element: II))
2493	DependenciesSatisfied = false;
2494	}
2495	if (!DependenciesSatisfied) {
2496	// Try the next one.
2497	++I;
2498	continue;
2499	}
2500
2501	// Emit #endif/#if pair if needed.
2502	if ((*I)->getArchGuard() != InGuard) {
2503	if (!InGuard.empty())
2504	OS << "#endif\n";
2505	InGuard = (*I)->getArchGuard();
2506	if (!InGuard.empty())
2507	OS << "#if " << InGuard << "\n";
2508	}
2509
2510	// Actually generate the intrinsic code.
2511	OS << (*I)->generate();
2512
2513	MadeProgress = true;
2514	I = Defs.erase(CI: I);
2515	}
2516	}
2517	assert(Defs.empty() && "Some requirements were not satisfied!");
2518	if (!InGuard.empty())
2519	OS << "#endif\n";
2520
2521	OS << "\n";
2522	OS << "#undef __ai\n\n";
2523	OS << "#endif /* if !defined(__ARM_NEON) */\n";
2524	OS << "#endif /* ifndef __ARM_FP */\n";
2525	}
2526
2527	/// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h
2528	/// is comprised of type definitions and function declarations.
2529	void NeonEmitter::runFP16(raw_ostream &OS) {
2530	OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
2531	"------------------------------"
2532	"---===\n"
2533	" *\n"
2534	" * Permission is hereby granted, free of charge, to any person "
2535	"obtaining a copy\n"
2536	" * of this software and associated documentation files (the "
2537	"\"Software\"), to deal\n"
2538	" * in the Software without restriction, including without limitation "
2539	"the rights\n"
2540	" * to use, copy, modify, merge, publish, distribute, sublicense, "
2541	"and/or sell\n"
2542	" * copies of the Software, and to permit persons to whom the Software "
2543	"is\n"
2544	" * furnished to do so, subject to the following conditions:\n"
2545	" *\n"
2546	" * The above copyright notice and this permission notice shall be "
2547	"included in\n"
2548	" * all copies or substantial portions of the Software.\n"
2549	" *\n"
2550	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2551	"EXPRESS OR\n"
2552	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2553	"MERCHANTABILITY,\n"
2554	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2555	"SHALL THE\n"
2556	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2557	"OTHER\n"
2558	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2559	"ARISING FROM,\n"
2560	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2561	"DEALINGS IN\n"
2562	" * THE SOFTWARE.\n"
2563	" *\n"
2564	" *===-----------------------------------------------------------------"
2565	"---"
2566	"---===\n"
2567	" */\n\n";
2568
2569	OS << "#ifndef __ARM_FP16_H\n";
2570	OS << "#define __ARM_FP16_H\n\n";
2571
2572	OS << "#include <stdint.h>\n\n";
2573
2574	OS << "typedef __fp16 float16_t;\n";
2575
2576	OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2577	"__nodebug__))\n\n";
2578
2579	SmallVector<Intrinsic *, `128`> Defs;
2580	for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2581	createIntrinsic(R, Out&: Defs);
2582
2583	for (auto *I : Defs)
2584	I->indexBody();
2585
2586	stable_sort(Range&: Defs, C: deref<std::less<>>());
2587
2588	// Only emit a def when its requirements have been met.
2589	// FIXME: This loop could be made faster, but it's fast enough for now.
2590	bool MadeProgress = true;
2591	std::string InGuard;
2592	while (!Defs.empty() && MadeProgress) {
2593	MadeProgress = false;
2594
2595	for (SmallVector<Intrinsic *, `128`>::iterator I = Defs.begin();
2596	I != Defs.end(); /No step/) {
2597	bool DependenciesSatisfied = true;
2598	for (auto II : (I)->getDependencies()) {
2599	if (is_contained(Range&: Defs, Element: II))
2600	DependenciesSatisfied = false;
2601	}
2602	if (!DependenciesSatisfied) {
2603	// Try the next one.
2604	++I;
2605	continue;
2606	}
2607
2608	// Emit #endif/#if pair if needed.
2609	if ((*I)->getArchGuard() != InGuard) {
2610	if (!InGuard.empty())
2611	OS << "#endif\n";
2612	InGuard = (*I)->getArchGuard();
2613	if (!InGuard.empty())
2614	OS << "#if " << InGuard << "\n";
2615	}
2616
2617	// Actually generate the intrinsic code.
2618	OS << (*I)->generate();
2619
2620	MadeProgress = true;
2621	I = Defs.erase(CI: I);
2622	}
2623	}
2624	assert(Defs.empty() && "Some requirements were not satisfied!");
2625	if (!InGuard.empty())
2626	OS << "#endif\n";
2627
2628	OS << "\n";
2629	OS << "#undef __ai\n\n";
2630	OS << "#endif /* __ARM_FP16_H */\n";
2631	}
2632
2633	void NeonEmitter::runVectorTypes(raw_ostream &OS) {
2634	OS << "/*===---- arm_vector_types - ARM vector type "
2635	"------===\n"
2636	" *\n"
2637	" *\n"
2638	" * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2639	"Exceptions.\n"
2640	" * See https://llvm.org/LICENSE.txt for license information.\n"
2641	" * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2642	" *\n"
2643	" *===-----------------------------------------------------------------"
2644	"------===\n"
2645	" */\n\n";
2646	OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n";
2647	OS << "#error \"This file should not be used standalone. Please include"
2648	" arm_neon.h or arm_sve.h instead\"\n\n";
2649	OS << "#endif\n";
2650	OS << "#ifndef __ARM_NEON_TYPES_H\n";
2651	OS << "#define __ARM_NEON_TYPES_H\n";
2652	OS << "typedef float float32_t;\n";
2653	OS << "typedef __fp16 float16_t;\n";
2654
2655	OS << "#if defined(__aarch64__) \|\| defined(__arm64ec__)\n";
2656	OS << "typedef __mfp8 mfloat8_t;\n";
2657	OS << "typedef double float64_t;\n";
2658	OS << "#endif\n\n";
2659
2660	OS << R"(
2661	typedef uint64_t fpm_t;
2662
2663	enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
2664
2665	enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
2666
2667	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2668	__arm_fpm_init(void) {
2669	return 0;
2670	}
2671
2672	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2673	__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2674	return (__fpm & ~7ull) \| (fpm_t)__format;
2675	}
2676
2677	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2678	__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2679	return (__fpm & ~0x38ull) \| ((fpm_t)__format << 3u);
2680	}
2681
2682	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2683	__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2684	return (__fpm & ~0x1c0ull) \| ((fpm_t)__format << 6u);
2685	}
2686
2687	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2688	__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2689	return (__fpm & ~0x4000ull) \| ((fpm_t)__behaviour << 14u);
2690	}
2691
2692	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2693	__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2694	return (__fpm & ~0x8000ull) \| ((fpm_t)__behaviour << 15u);
2695	}
2696
2697	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2698	__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
2699	return (__fpm & ~0x7f0000ull) \| (__scale << 16u);
2700	}
2701
2702	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2703	__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
2704	return (__fpm & ~0xff000000ull) \| (((fpm_t)__scale & 0xffu) << 24u);
2705	}
2706
2707	static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2708	__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
2709	return (uint32_t)__fpm \| (__scale << 32u);
2710	}
2711
2712	)";
2713
2714	emitNeonTypeDefs(types: "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlmQmhQhfQfdQd", OS);
2715
2716	emitNeonTypeDefs(types: "bQb", OS);
2717	OS << "#endif // __ARM_NEON_TYPES_H\n";
2718	}
2719
2720	void NeonEmitter::runBF16(raw_ostream &OS) {
2721	OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
2722	"-----------------------------------===\n"
2723	" *\n"
2724	" *\n"
2725	" * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2726	"Exceptions.\n"
2727	" * See https://llvm.org/LICENSE.txt for license information.\n"
2728	" * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2729	" *\n"
2730	" *===-----------------------------------------------------------------"
2731	"------===\n"
2732	" */\n\n";
2733
2734	OS << "#ifndef __ARM_BF16_H\n";
2735	OS << "#define __ARM_BF16_H\n\n";
2736
2737	OS << "typedef __bf16 bfloat16_t;\n";
2738
2739	OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2740	"__nodebug__))\n\n";
2741
2742	SmallVector<Intrinsic *, `128`> Defs;
2743	for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2744	createIntrinsic(R, Out&: Defs);
2745
2746	for (auto *I : Defs)
2747	I->indexBody();
2748
2749	stable_sort(Range&: Defs, C: deref<std::less<>>());
2750
2751	// Only emit a def when its requirements have been met.
2752	// FIXME: This loop could be made faster, but it's fast enough for now.
2753	bool MadeProgress = true;
2754	std::string InGuard;
2755	while (!Defs.empty() && MadeProgress) {
2756	MadeProgress = false;
2757
2758	for (SmallVector<Intrinsic *, `128`>::iterator I = Defs.begin();
2759	I != Defs.end(); /No step/) {
2760	bool DependenciesSatisfied = true;
2761	for (auto II : (I)->getDependencies()) {
2762	if (is_contained(Range&: Defs, Element: II))
2763	DependenciesSatisfied = false;
2764	}
2765	if (!DependenciesSatisfied) {
2766	// Try the next one.
2767	++I;
2768	continue;
2769	}
2770
2771	// Emit #endif/#if pair if needed.
2772	if ((*I)->getArchGuard() != InGuard) {
2773	if (!InGuard.empty())
2774	OS << "#endif\n";
2775	InGuard = (*I)->getArchGuard();
2776	if (!InGuard.empty())
2777	OS << "#if " << InGuard << "\n";
2778	}
2779
2780	// Actually generate the intrinsic code.
2781	OS << (*I)->generate();
2782
2783	MadeProgress = true;
2784	I = Defs.erase(CI: I);
2785	}
2786	}
2787	assert(Defs.empty() && "Some requirements were not satisfied!");
2788	if (!InGuard.empty())
2789	OS << "#endif\n";
2790
2791	OS << "\n";
2792	OS << "#undef __ai\n\n";
2793
2794	OS << "#endif\n";
2795	}
2796
2797	void clang::EmitNeon(const RecordKeeper &Records, raw_ostream &OS) {
2798	NeonEmitter (Records).run(OS);
2799	}
2800
2801	void clang::EmitFP16(const RecordKeeper &Records, raw_ostream &OS) {
2802	NeonEmitter (Records).runFP16(OS);
2803	}
2804
2805	void clang::EmitBF16(const RecordKeeper &Records, raw_ostream &OS) {
2806	NeonEmitter (Records).runBF16(OS);
2807	}
2808
2809	void clang::EmitNeonSema(const RecordKeeper &Records, raw_ostream &OS) {
2810	NeonEmitter (Records).runHeader(OS);
2811	}
2812
2813	void clang::EmitVectorTypes(const RecordKeeper &Records, raw_ostream &OS) {
2814	NeonEmitter (Records).runVectorTypes(OS);
2815	}
2816
2817	void clang::EmitNeonTest(const RecordKeeper &Records, raw_ostream &OS) {
2818	llvm_unreachable("Neon test generation no longer implemented!");
2819	}
2820

source code of clang/utils/TableGen/NeonEmitter.cpp