1//===-- NeonEmitter.cpp - Generate arm_neon.h for use with clang ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This tablegen backend is responsible for emitting arm_neon.h, which includes
10// a declaration and definition of each function specified by the ARM NEON
11// compiler interface. See ARM document DUI0348B.
12//
13// Each NEON instruction is implemented in terms of 1 or more functions which
14// are suffixed with the element type of the input vectors. Functions may be
15// implemented in terms of generic vector operations such as +, *, -, etc. or
16// by calling a __builtin_-prefixed function which will be handled by clang's
17// CodeGen library.
18//
19// Additional validation code can be generated by this file when runHeader() is
20// called, rather than the normal run() entry point.
21//
22// See also the documentation in include/clang/Basic/arm_neon.td.
23//
24//===----------------------------------------------------------------------===//
25
26#include "TableGenBackends.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/raw_ostream.h"
36#include "llvm/TableGen/AArch64ImmCheck.h"
37#include "llvm/TableGen/Error.h"
38#include "llvm/TableGen/Record.h"
39#include "llvm/TableGen/SetTheory.h"
40#include "llvm/TableGen/StringToOffsetTable.h"
41#include <algorithm>
42#include <cassert>
43#include <cctype>
44#include <cstddef>
45#include <cstdint>
46#include <deque>
47#include <map>
48#include <optional>
49#include <set>
50#include <sstream>
51#include <string>
52#include <unordered_map>
53#include <utility>
54#include <vector>
55
56using namespace llvm;
57
58namespace {
59
60// While globals are generally bad, this one allows us to perform assertions
61// liberally and somehow still trace them back to the def they indirectly
62// came from.
63static const Record *CurrentRecord = nullptr;
64static void assert_with_loc(bool Assertion, const std::string &Str) {
65 if (!Assertion) {
66 if (CurrentRecord)
67 PrintFatalError(ErrorLoc: CurrentRecord->getLoc(), Msg: Str);
68 else
69 PrintFatalError(Msg: Str);
70 }
71}
72
73enum ClassKind {
74 ClassNone,
75 ClassI, // generic integer instruction, e.g., "i8" suffix
76 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
77 ClassW, // width-specific instruction, e.g., "8" suffix
78 ClassV, // void-suffix instruction, no suffix
79 ClassB, // bitcast arguments with enum argument to specify type
80 ClassL, // Logical instructions which are op instructions
81 // but we need to not emit any suffix for in our
82 // tests.
83 ClassNoTest // Instructions which we do not test since they are
84 // not TRUE instructions.
85};
86
87/// NeonTypeFlags - Flags to identify the types for overloaded Neon
88/// builtins. These must be kept in sync with the flags in
89/// include/clang/Basic/TargetBuiltins.h.
90namespace NeonTypeFlags {
91
92enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
93
94enum EltType {
95 Int8,
96 Int16,
97 Int32,
98 Int64,
99 Poly8,
100 Poly16,
101 Poly64,
102 Poly128,
103 Float16,
104 Float32,
105 Float64,
106 BFloat16,
107 MFloat8
108};
109
110} // end namespace NeonTypeFlags
111
112class NeonEmitter;
113
114//===----------------------------------------------------------------------===//
115// TypeSpec
116//===----------------------------------------------------------------------===//
117
118/// A TypeSpec is just a simple wrapper around a string, but gets its own type
119/// for strong typing purposes.
120///
121/// A TypeSpec can be used to create a type.
122class TypeSpec : public std::string {
123public:
124 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
125 std::vector<TypeSpec> Ret;
126 TypeSpec Acc;
127 for (char I : Str.str()) {
128 if (islower(I)) {
129 Acc.push_back(c: I);
130 Ret.push_back(x: TypeSpec(Acc));
131 Acc.clear();
132 } else {
133 Acc.push_back(c: I);
134 }
135 }
136 return Ret;
137 }
138};
139
140//===----------------------------------------------------------------------===//
141// Type
142//===----------------------------------------------------------------------===//
143
144/// A Type. Not much more to say here.
145class Type {
146private:
147 TypeSpec TS;
148
149 enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8, FPM };
150 TypeKind Kind;
151 bool Immediate, Constant, Pointer;
152 // ScalarForMangling and NoManglingQ are really not suited to live here as
153 // they are not related to the type. But they live in the TypeSpec (not the
154 // prototype), so this is really the only place to store them.
155 bool ScalarForMangling, NoManglingQ;
156 unsigned Bitwidth, ElementBitwidth, NumVectors;
157
158public:
159 Type()
160 : Kind(Void), Immediate(false), Constant(false),
161 Pointer(false), ScalarForMangling(false), NoManglingQ(false),
162 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
163
164 Type(TypeSpec TS, StringRef CharMods)
165 : TS(std::move(TS)), Kind(Void), Immediate(false),
166 Constant(false), Pointer(false), ScalarForMangling(false),
167 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
168 applyModifiers(Mods: CharMods);
169 }
170
171 /// Returns a type representing "void".
172 static Type getVoid() { return Type(); }
173
174 bool operator==(const Type &Other) const { return str() == Other.str(); }
175 bool operator!=(const Type &Other) const { return !operator==(Other); }
176
177 //
178 // Query functions
179 //
180 bool isScalarForMangling() const { return ScalarForMangling; }
181 bool noManglingQ() const { return NoManglingQ; }
182
183 bool isPointer() const { return Pointer; }
184 bool isValue() const { return !isVoid() && !isPointer(); }
185 bool isScalar() const { return isValue() && NumVectors == 0; }
186 bool isVector() const { return isValue() && NumVectors > 0; }
187 bool isConstPointer() const { return Constant; }
188 bool isFloating() const { return Kind == Float; }
189 bool isInteger() const { return Kind == SInt || Kind == UInt; }
190 bool isPoly() const { return Kind == Poly; }
191 bool isSigned() const { return Kind == SInt; }
192 bool isImmediate() const { return Immediate; }
193 bool isFloat() const { return isFloating() && ElementBitwidth == 32; }
194 bool isDouble() const { return isFloating() && ElementBitwidth == 64; }
195 bool isHalf() const { return isFloating() && ElementBitwidth == 16; }
196 bool isChar() const { return ElementBitwidth == 8; }
197 bool isShort() const { return isInteger() && ElementBitwidth == 16; }
198 bool isInt() const { return isInteger() && ElementBitwidth == 32; }
199 bool isLong() const { return isInteger() && ElementBitwidth == 64; }
200 bool isVoid() const { return Kind == Void; }
201 bool isBFloat16() const { return Kind == BFloat16; }
202 bool isMFloat8() const { return Kind == MFloat8; }
203 bool isFPM() const { return Kind == FPM; }
204 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
205 unsigned getSizeInBits() const { return Bitwidth; }
206 unsigned getElementSizeInBits() const { return ElementBitwidth; }
207 unsigned getNumVectors() const { return NumVectors; }
208
209 //
210 // Mutator functions
211 //
212 void makeUnsigned() {
213 assert(!isVoid() && "not a potentially signed type");
214 Kind = UInt;
215 }
216 void makeSigned() {
217 assert(!isVoid() && "not a potentially signed type");
218 Kind = SInt;
219 }
220
221 void makeInteger(unsigned ElemWidth, bool Sign) {
222 assert(!isVoid() && "converting void to int probably not useful");
223 Kind = Sign ? SInt : UInt;
224 Immediate = false;
225 ElementBitwidth = ElemWidth;
226 }
227
228 void makeImmediate(unsigned ElemWidth) {
229 Kind = SInt;
230 Immediate = true;
231 ElementBitwidth = ElemWidth;
232 }
233
234 void makeScalar() {
235 Bitwidth = ElementBitwidth;
236 NumVectors = 0;
237 }
238
239 void makeOneVector() {
240 assert(isVector());
241 NumVectors = 1;
242 }
243
244 void make32BitElement() {
245 assert_with_loc(Assertion: Bitwidth > 32, Str: "Not enough bits to make it 32!");
246 ElementBitwidth = 32;
247 }
248
249 void doubleLanes() {
250 assert_with_loc(Assertion: Bitwidth != 128, Str: "Can't get bigger than 128!");
251 Bitwidth = 128;
252 }
253
254 void halveLanes() {
255 assert_with_loc(Assertion: Bitwidth != 64, Str: "Can't get smaller than 64!");
256 Bitwidth = 64;
257 }
258
259 /// Return the C string representation of a type, which is the typename
260 /// defined in stdint.h or arm_neon.h.
261 std::string str() const;
262
263 /// Return the string representation of a type, which is an encoded
264 /// string for passing to the BUILTIN() macro in Builtins.def.
265 std::string builtin_str() const;
266
267 /// Return the value in NeonTypeFlags for this type.
268 unsigned getNeonEnum() const;
269
270 /// Parse a type from a stdint.h or arm_neon.h typedef name,
271 /// for example uint32x2_t or int64_t.
272 static Type fromTypedefName(StringRef Name);
273
274private:
275 /// Creates the type based on the typespec string in TS.
276 /// Sets "Quad" to true if the "Q" or "H" modifiers were
277 /// seen. This is needed by applyModifier as some modifiers
278 /// only take effect if the type size was changed by "Q" or "H".
279 void applyTypespec(bool &Quad);
280 /// Applies prototype modifiers to the type.
281 void applyModifiers(StringRef Mods);
282};
283
284//===----------------------------------------------------------------------===//
285// Variable
286//===----------------------------------------------------------------------===//
287
288/// A variable is a simple class that just has a type and a name.
289class Variable {
290 Type T;
291 std::string N;
292
293public:
294 Variable() : T(Type::getVoid()) {}
295 Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
296
297 Type getType() const { return T; }
298 std::string getName() const { return "__" + N; }
299};
300
301//===----------------------------------------------------------------------===//
302// Intrinsic
303//===----------------------------------------------------------------------===//
304
305/// The main grunt class. This represents an instantiation of an intrinsic with
306/// a particular typespec and prototype.
307class Intrinsic {
308 /// The Record this intrinsic was created from.
309 const Record *R;
310 /// The unmangled name.
311 std::string Name;
312 /// The input and output typespecs. InTS == OutTS except when
313 /// CartesianProductWith is non-empty - this is the case for vreinterpret.
314 TypeSpec OutTS, InTS;
315 /// The base class kind. Most intrinsics use ClassS, which has full type
316 /// info for integers (s32/u32). Some use ClassI, which doesn't care about
317 /// signedness (i32), while some (ClassB) have no type at all, only a width
318 /// (32).
319 ClassKind CK;
320 /// The list of DAGs for the body. May be empty, in which case we should
321 /// emit a builtin call.
322 const ListInit *Body;
323 /// The architectural ifdef guard.
324 std::string ArchGuard;
325 /// The architectural target() guard.
326 std::string TargetGuard;
327 /// Set if the Unavailable bit is 1. This means we don't generate a body,
328 /// just an "unavailable" attribute on a declaration.
329 bool IsUnavailable;
330 /// Is this intrinsic safe for big-endian? or does it need its arguments
331 /// reversing?
332 bool BigEndianSafe;
333
334 /// The types of return value [0] and parameters [1..].
335 std::vector<Type> Types;
336
337 SmallVector<ImmCheck, 2> ImmChecks;
338 /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
339 int PolymorphicKeyType;
340 /// The local variables defined.
341 std::map<std::string, Variable, std::less<>> Variables;
342 /// NeededEarly - set if any other intrinsic depends on this intrinsic.
343 bool NeededEarly;
344 /// UseMacro - set if we should implement using a macro or unset for a
345 /// function.
346 bool UseMacro;
347 /// The set of intrinsics that this intrinsic uses/requires.
348 std::set<Intrinsic *> Dependencies;
349 /// The "base type", which is Type('d', OutTS). InBaseType is only
350 /// different if CartesianProductWith is non-empty (for vreinterpret).
351 Type BaseType, InBaseType;
352 /// The return variable.
353 Variable RetVar;
354 /// A postfix to apply to every variable. Defaults to "".
355 std::string VariablePostfix;
356
357 NeonEmitter &Emitter;
358 std::stringstream OS;
359
360 bool isBigEndianSafe() const {
361 if (BigEndianSafe)
362 return true;
363
364 for (const auto &T : Types){
365 if (T.isVector() && T.getNumElements() > 1)
366 return false;
367 }
368 return true;
369 }
370
371public:
372 Intrinsic(const Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
373 TypeSpec InTS, ClassKind CK, const ListInit *Body,
374 NeonEmitter &Emitter, StringRef ArchGuard, StringRef TargetGuard,
375 bool IsUnavailable, bool BigEndianSafe)
376 : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
377 ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()),
378 IsUnavailable(IsUnavailable), BigEndianSafe(BigEndianSafe),
379 PolymorphicKeyType(0), NeededEarly(false), UseMacro(false),
380 BaseType(OutTS, "."), InBaseType(InTS, "."), Emitter(Emitter) {
381 // Modify the TypeSpec per-argument to get a concrete Type, and create
382 // known variables for each.
383 // Types[0] is the return value.
384 unsigned Pos = 0;
385 Types.emplace_back(args&: OutTS, args: getNextModifiers(Proto, Pos));
386 StringRef Mods = getNextModifiers(Proto, Pos);
387 while (!Mods.empty()) {
388 Types.emplace_back(args&: InTS, args&: Mods);
389 if (Mods.contains(C: '!'))
390 PolymorphicKeyType = Types.size() - 1;
391
392 Mods = getNextModifiers(Proto, Pos);
393 }
394
395 for (const auto &Type : Types) {
396 // If this builtin takes an immediate argument, we need to #define it rather
397 // than use a standard declaration, so that SemaChecking can range check
398 // the immediate passed by the user.
399
400 // Pointer arguments need to use macros to avoid hiding aligned attributes
401 // from the pointer type.
402
403 // It is not permitted to pass or return an __fp16 by value, so intrinsics
404 // taking a scalar float16_t must be implemented as macros.
405 if (Type.isImmediate() || Type.isPointer() ||
406 (Type.isScalar() && Type.isHalf()))
407 UseMacro = true;
408 }
409
410 int ArgIdx, Kind, TypeArgIdx;
411 for (const Record *I : R->getValueAsListOfDefs(FieldName: "ImmChecks")) {
412 unsigned EltSizeInBits = 0, VecSizeInBits = 0;
413
414 ArgIdx = I->getValueAsInt(FieldName: "ImmArgIdx");
415 TypeArgIdx = I->getValueAsInt(FieldName: "TypeContextArgIdx");
416 Kind = I->getValueAsDef(FieldName: "Kind")->getValueAsInt(FieldName: "Value");
417
418 assert((ArgIdx >= 0 && Kind >= 0) &&
419 "ImmArgIdx and Kind must be nonnegative");
420
421 if (TypeArgIdx >= 0) {
422 Type ContextType = getParamType(I: TypeArgIdx);
423
424 // Element size cannot be set for intrinscs that map to polymorphic
425 // builtins.
426 if (CK != ClassB)
427 EltSizeInBits = ContextType.getElementSizeInBits();
428
429 VecSizeInBits = ContextType.getSizeInBits();
430 }
431
432 ImmChecks.emplace_back(Args&: ArgIdx, Args&: Kind, Args&: EltSizeInBits, Args&: VecSizeInBits);
433 }
434 sort(Start: ImmChecks.begin(), End: ImmChecks.end(),
435 Comp: [](const ImmCheck &a, const ImmCheck &b) {
436 return a.getImmArgIdx() < b.getImmArgIdx();
437 }); // Sort for comparison with other intrinsics which map to the
438 // same builtin
439 }
440
441 /// Get the Record that this intrinsic is based off.
442 const Record *getRecord() const { return R; }
443 /// Get the set of Intrinsics that this intrinsic calls.
444 /// this is the set of immediate dependencies, NOT the
445 /// transitive closure.
446 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
447 /// Get the architectural guard string (#ifdef).
448 std::string getArchGuard() const { return ArchGuard; }
449 std::string getTargetGuard() const { return TargetGuard; }
450 ArrayRef<ImmCheck> getImmChecks() const { return ImmChecks; }
451 /// Get the non-mangled name.
452 std::string getName() const { return Name; }
453
454 /// Return true if the intrinsic takes an immediate operand.
455 bool hasImmediate() const {
456 return any_of(Range: Types, P: [](const Type &T) { return T.isImmediate(); });
457 }
458
459 // Return if the supplied argument is an immediate
460 bool isArgImmediate(unsigned idx) const {
461 return Types[idx + 1].isImmediate();
462 }
463
464 unsigned getNumParams() const { return Types.size() - 1; }
465 Type getReturnType() const { return Types[0]; }
466 Type getParamType(unsigned I) const { return Types[I + 1]; }
467 Type getBaseType() const { return BaseType; }
468 Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; }
469
470 /// Return true if the prototype has a scalar argument.
471 bool protoHasScalar() const;
472
473 /// Return the index that parameter PIndex will sit at
474 /// in a generated function call. This is often just PIndex,
475 /// but may not be as things such as multiple-vector operands
476 /// and sret parameters need to be taken into account.
477 unsigned getGeneratedParamIdx(unsigned PIndex) {
478 unsigned Idx = 0;
479 if (getReturnType().getNumVectors() > 1)
480 // Multiple vectors are passed as sret.
481 ++Idx;
482
483 for (unsigned I = 0; I < PIndex; ++I)
484 Idx += std::max(a: 1U, b: getParamType(I).getNumVectors());
485
486 return Idx;
487 }
488
489 bool hasBody() const { return Body && !Body->empty(); }
490
491 void setNeededEarly() { NeededEarly = true; }
492
493 bool operator<(const Intrinsic &Other) const {
494 // Sort lexicographically on a three-tuple (ArchGuard, TargetGuard, Name)
495 return std::tie(args: ArchGuard, args: TargetGuard, args: Name) <
496 std::tie(args: Other.ArchGuard, args: Other.TargetGuard, args: Other.Name);
497 }
498
499 ClassKind getClassKind(bool UseClassBIfScalar = false) {
500 if (UseClassBIfScalar && !protoHasScalar())
501 return ClassB;
502 return CK;
503 }
504
505 /// Return the name, mangled with type information.
506 /// If ForceClassS is true, use ClassS (u32/s32) instead
507 /// of the intrinsic's own type class.
508 std::string getMangledName(bool ForceClassS = false) const;
509 /// Return the type code for a builtin function call.
510 std::string getInstTypeCode(Type T, ClassKind CK) const;
511 /// Return the type string for a BUILTIN() macro in Builtins.def.
512 std::string getBuiltinTypeStr();
513
514 /// Generate the intrinsic, returning code.
515 std::string generate();
516 /// Perform type checking and populate the dependency graph, but
517 /// don't generate code yet.
518 void indexBody();
519
520private:
521 StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
522
523 std::string mangleName(std::string Name, ClassKind CK) const;
524
525 void initVariables();
526 std::string replaceParamsIn(std::string S);
527
528 void emitBodyAsBuiltinCall();
529
530 void generateImpl(bool ReverseArguments,
531 StringRef NamePrefix, StringRef CallPrefix);
532 void emitReturn();
533 void emitBody(StringRef CallPrefix);
534 void emitShadowedArgs();
535 void emitArgumentReversal();
536 void emitReturnVarDecl();
537 void emitReturnReversal();
538 void emitReverseVariable(Variable &Dest, Variable &Src);
539 void emitNewLine();
540 void emitClosingBrace();
541 void emitOpeningBrace();
542 void emitPrototype(StringRef NamePrefix);
543
544 class DagEmitter {
545 Intrinsic &Intr;
546 StringRef CallPrefix;
547
548 public:
549 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
550 Intr(Intr), CallPrefix(CallPrefix) {
551 }
552 std::pair<Type, std::string> emitDagArg(const Init *Arg,
553 std::string ArgName);
554 std::pair<Type, std::string> emitDagSaveTemp(const DagInit *DI);
555 std::pair<Type, std::string> emitDagSplat(const DagInit *DI);
556 std::pair<Type, std::string> emitDagDup(const DagInit *DI);
557 std::pair<Type, std::string> emitDagDupTyped(const DagInit *DI);
558 std::pair<Type, std::string> emitDagShuffle(const DagInit *DI);
559 std::pair<Type, std::string> emitDagCast(const DagInit *DI, bool IsBitCast);
560 std::pair<Type, std::string> emitDagCall(const DagInit *DI,
561 bool MatchMangledName);
562 std::pair<Type, std::string> emitDagNameReplace(const DagInit *DI);
563 std::pair<Type, std::string> emitDagLiteral(const DagInit *DI);
564 std::pair<Type, std::string> emitDagOp(const DagInit *DI);
565 std::pair<Type, std::string> emitDag(const DagInit *DI);
566 };
567};
568
569//===----------------------------------------------------------------------===//
570// NeonEmitter
571//===----------------------------------------------------------------------===//
572
573class NeonEmitter {
574 const RecordKeeper &Records;
575 DenseMap<const Record *, ClassKind> ClassMap;
576 std::map<std::string, std::deque<Intrinsic>, std::less<>> IntrinsicMap;
577 unsigned UniqueNumber;
578
579 void createIntrinsic(const Record *R, SmallVectorImpl<Intrinsic *> &Out);
580 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
581 void genStreamingSVECompatibleList(raw_ostream &OS,
582 SmallVectorImpl<Intrinsic *> &Defs);
583 void genOverloadTypeCheckCode(raw_ostream &OS,
584 SmallVectorImpl<Intrinsic *> &Defs);
585 bool areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
586 const ArrayRef<ImmCheck> ChecksB);
587 void genIntrinsicRangeCheckCode(raw_ostream &OS,
588 SmallVectorImpl<Intrinsic *> &Defs);
589
590public:
591 /// Called by Intrinsic - this attempts to get an intrinsic that takes
592 /// the given types as arguments.
593 Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
594 std::optional<std::string> MangledName);
595
596 /// Called by Intrinsic - returns a globally-unique number.
597 unsigned getUniqueNumber() { return UniqueNumber++; }
598
599 NeonEmitter(const RecordKeeper &R) : Records(R), UniqueNumber(0) {
600 const Record *SI = R.getClass(Name: "SInst");
601 const Record *II = R.getClass(Name: "IInst");
602 const Record *WI = R.getClass(Name: "WInst");
603 const Record *VI = R.getClass(Name: "VInst");
604 const Record *SOpI = R.getClass(Name: "SOpInst");
605 const Record *IOpI = R.getClass(Name: "IOpInst");
606 const Record *WOpI = R.getClass(Name: "WOpInst");
607 const Record *LOpI = R.getClass(Name: "LOpInst");
608 const Record *NoTestOpI = R.getClass(Name: "NoTestOpInst");
609
610 ClassMap[SI] = ClassS;
611 ClassMap[II] = ClassI;
612 ClassMap[WI] = ClassW;
613 ClassMap[VI] = ClassV;
614 ClassMap[SOpI] = ClassS;
615 ClassMap[IOpI] = ClassI;
616 ClassMap[WOpI] = ClassW;
617 ClassMap[LOpI] = ClassL;
618 ClassMap[NoTestOpI] = ClassNoTest;
619 }
620
621 // Emit arm_neon.h.inc
622 void run(raw_ostream &o);
623
624 // Emit arm_fp16.h.inc
625 void runFP16(raw_ostream &o);
626
627 // Emit arm_bf16.h.inc
628 void runBF16(raw_ostream &o);
629
630 void runVectorTypes(raw_ostream &o);
631
632 // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
633 // arm_bf16.h
634 void runHeader(raw_ostream &o);
635};
636
637} // end anonymous namespace
638
639//===----------------------------------------------------------------------===//
640// Type implementation
641//===----------------------------------------------------------------------===//
642
643std::string Type::str() const {
644 if (isVoid())
645 return "void";
646 if (isFPM())
647 return "fpm_t";
648
649 std::string S;
650
651 if (isInteger() && !isSigned())
652 S += "u";
653
654 if (isPoly())
655 S += "poly";
656 else if (isFloating())
657 S += "float";
658 else if (isBFloat16())
659 S += "bfloat";
660 else if (isMFloat8())
661 S += "mfloat";
662 else
663 S += "int";
664
665 S += utostr(X: ElementBitwidth);
666 if (isVector())
667 S += "x" + utostr(X: getNumElements());
668 if (NumVectors > 1)
669 S += "x" + utostr(X: NumVectors);
670 S += "_t";
671
672 if (Constant)
673 S += " const";
674 if (Pointer)
675 S += " *";
676
677 return S;
678}
679
680std::string Type::builtin_str() const {
681 std::string S;
682 if (isVoid())
683 return "v";
684
685 if (isPointer()) {
686 // All pointers are void pointers.
687 S = "v";
688 if (isConstPointer())
689 S += "C";
690 S += "*";
691 return S;
692 } else if (isInteger())
693 switch (ElementBitwidth) {
694 case 8: S += "c"; break;
695 case 16: S += "s"; break;
696 case 32: S += "i"; break;
697 case 64: S += "Wi"; break;
698 case 128: S += "LLLi"; break;
699 default: llvm_unreachable("Unhandled case!");
700 }
701 else if (isBFloat16()) {
702 assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");
703 S += "y";
704 } else if (isMFloat8()) {
705 assert(ElementBitwidth == 8 && "MFloat8 can only be 8 bits");
706 S += "m";
707 } else if (isFPM()) {
708 S += "UWi";
709 } else
710 switch (ElementBitwidth) {
711 case 16: S += "h"; break;
712 case 32: S += "f"; break;
713 case 64: S += "d"; break;
714 default: llvm_unreachable("Unhandled case!");
715 }
716
717 // FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
718 if (isChar() && !isPointer() && isSigned())
719 // Make chars explicitly signed.
720 S = "S" + S;
721 else if (isInteger() && !isSigned())
722 S = "U" + S;
723
724 // Constant indices are "int", but have the "constant expression" modifier.
725 if (isImmediate()) {
726 assert(isInteger() && isSigned());
727 S = "I" + S;
728 }
729
730 if (isScalar())
731 return S;
732
733 std::string Ret;
734 for (unsigned I = 0; I < NumVectors; ++I)
735 Ret += "V" + utostr(X: getNumElements()) + S;
736
737 return Ret;
738}
739
740unsigned Type::getNeonEnum() const {
741 unsigned Addend;
742 switch (ElementBitwidth) {
743 case 8: Addend = 0; break;
744 case 16: Addend = 1; break;
745 case 32: Addend = 2; break;
746 case 64: Addend = 3; break;
747 case 128: Addend = 4; break;
748 default: llvm_unreachable("Unhandled element bitwidth!");
749 }
750
751 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
752 if (isPoly()) {
753 // Adjustment needed because Poly32 doesn't exist.
754 if (Addend >= 2)
755 --Addend;
756 Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
757 }
758 if (isFloating()) {
759 assert(Addend != 0 && "Float8 doesn't exist!");
760 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
761 }
762
763 if (isBFloat16()) {
764 assert(Addend == 1 && "BFloat16 is only 16 bit");
765 Base = (unsigned)NeonTypeFlags::BFloat16;
766 }
767
768 if (isMFloat8()) {
769 Base = (unsigned)NeonTypeFlags::MFloat8;
770 }
771
772 if (Bitwidth == 128)
773 Base |= (unsigned)NeonTypeFlags::QuadFlag;
774 if (isInteger() && !isSigned())
775 Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
776
777 return Base;
778}
779
780Type Type::fromTypedefName(StringRef Name) {
781 Type T;
782 T.Kind = SInt;
783
784 if (Name.consume_front(Prefix: "u"))
785 T.Kind = UInt;
786
787 if (Name.consume_front(Prefix: "float")) {
788 T.Kind = Float;
789 } else if (Name.consume_front(Prefix: "poly")) {
790 T.Kind = Poly;
791 } else if (Name.consume_front(Prefix: "bfloat")) {
792 T.Kind = BFloat16;
793 } else if (Name.consume_front(Prefix: "mfloat")) {
794 T.Kind = MFloat8;
795 } else {
796 assert(Name.starts_with("int"));
797 Name = Name.drop_front(N: 3);
798 }
799
800 unsigned I = 0;
801 for (I = 0; I < Name.size(); ++I) {
802 if (!isdigit(Name[I]))
803 break;
804 }
805 Name.substr(Start: 0, N: I).getAsInteger(Radix: 10, Result&: T.ElementBitwidth);
806 Name = Name.drop_front(N: I);
807
808 T.Bitwidth = T.ElementBitwidth;
809 T.NumVectors = 1;
810
811 if (Name.consume_front(Prefix: "x")) {
812 unsigned I = 0;
813 for (I = 0; I < Name.size(); ++I) {
814 if (!isdigit(Name[I]))
815 break;
816 }
817 unsigned NumLanes;
818 Name.substr(Start: 0, N: I).getAsInteger(Radix: 10, Result&: NumLanes);
819 Name = Name.drop_front(N: I);
820 T.Bitwidth = T.ElementBitwidth * NumLanes;
821 } else {
822 // Was scalar.
823 T.NumVectors = 0;
824 }
825 if (Name.consume_front(Prefix: "x")) {
826 unsigned I = 0;
827 for (I = 0; I < Name.size(); ++I) {
828 if (!isdigit(Name[I]))
829 break;
830 }
831 Name.substr(Start: 0, N: I).getAsInteger(Radix: 10, Result&: T.NumVectors);
832 Name = Name.drop_front(N: I);
833 }
834
835 assert(Name.starts_with("_t") && "Malformed typedef!");
836 return T;
837}
838
839void Type::applyTypespec(bool &Quad) {
840 std::string S = TS;
841 ScalarForMangling = false;
842 Kind = SInt;
843 ElementBitwidth = ~0U;
844 NumVectors = 1;
845
846 for (char I : S) {
847 switch (I) {
848 case 'S':
849 ScalarForMangling = true;
850 break;
851 case 'H':
852 NoManglingQ = true;
853 Quad = true;
854 break;
855 case 'Q':
856 Quad = true;
857 break;
858 case 'P':
859 Kind = Poly;
860 break;
861 case 'U':
862 Kind = UInt;
863 break;
864 case 'c':
865 ElementBitwidth = 8;
866 break;
867 case 'h':
868 Kind = Float;
869 [[fallthrough]];
870 case 's':
871 ElementBitwidth = 16;
872 break;
873 case 'f':
874 Kind = Float;
875 [[fallthrough]];
876 case 'i':
877 ElementBitwidth = 32;
878 break;
879 case 'd':
880 Kind = Float;
881 [[fallthrough]];
882 case 'l':
883 ElementBitwidth = 64;
884 break;
885 case 'k':
886 ElementBitwidth = 128;
887 // Poly doesn't have a 128x1 type.
888 if (isPoly())
889 NumVectors = 0;
890 break;
891 case 'b':
892 Kind = BFloat16;
893 ElementBitwidth = 16;
894 break;
895 case 'm':
896 Kind = MFloat8;
897 ElementBitwidth = 8;
898 break;
899 default:
900 llvm_unreachable("Unhandled type code!");
901 }
902 }
903 assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
904
905 Bitwidth = Quad ? 128 : 64;
906}
907
908void Type::applyModifiers(StringRef Mods) {
909 bool AppliedQuad = false;
910 applyTypespec(Quad&: AppliedQuad);
911
912 for (char Mod : Mods) {
913 switch (Mod) {
914 case '.':
915 break;
916 case 'v':
917 Kind = Void;
918 break;
919 case 'S':
920 Kind = SInt;
921 break;
922 case 'U':
923 Kind = UInt;
924 break;
925 case 'B':
926 Kind = BFloat16;
927 ElementBitwidth = 16;
928 break;
929 case 'F':
930 Kind = Float;
931 break;
932 case 'P':
933 Kind = Poly;
934 break;
935 case 'V':
936 Kind = FPM;
937 Bitwidth = ElementBitwidth = 64;
938 NumVectors = 0;
939 Immediate = Constant = Pointer = false;
940 ScalarForMangling = NoManglingQ = true;
941 break;
942 case '>':
943 assert(ElementBitwidth < 128);
944 ElementBitwidth *= 2;
945 break;
946 case '<':
947 assert(ElementBitwidth > 8);
948 ElementBitwidth /= 2;
949 break;
950 case '1':
951 NumVectors = 0;
952 break;
953 case '2':
954 NumVectors = 2;
955 break;
956 case '3':
957 NumVectors = 3;
958 break;
959 case '4':
960 NumVectors = 4;
961 break;
962 case '*':
963 Pointer = true;
964 break;
965 case 'c':
966 Constant = true;
967 break;
968 case 'Q':
969 Bitwidth = 128;
970 break;
971 case 'q':
972 Bitwidth = 64;
973 break;
974 case 'I':
975 Kind = SInt;
976 ElementBitwidth = Bitwidth = 32;
977 NumVectors = 0;
978 Immediate = true;
979 break;
980 case 'p':
981 if (isPoly())
982 Kind = UInt;
983 break;
984 case '!':
985 // Key type, handled elsewhere.
986 break;
987 default:
988 llvm_unreachable("Unhandled character!");
989 }
990 }
991}
992
993//===----------------------------------------------------------------------===//
994// Intrinsic implementation
995//===----------------------------------------------------------------------===//
996
997StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
998 if (Proto.size() == Pos)
999 return StringRef();
1000 else if (Proto[Pos] != '(')
1001 return Proto.substr(Start: Pos++, N: 1);
1002
1003 size_t Start = Pos + 1;
1004 size_t End = Proto.find(C: ')', From: Start);
1005 assert_with_loc(Assertion: End != StringRef::npos, Str: "unmatched modifier group paren");
1006 Pos = End + 1;
1007 return Proto.slice(Start, End);
1008}
1009
1010std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
1011 char typeCode = '\0';
1012 bool printNumber = true;
1013
1014 if (CK == ClassB && TargetGuard == "neon")
1015 return "";
1016
1017 if (this->CK == ClassV)
1018 return "";
1019
1020 if (T.isBFloat16())
1021 return "bf16";
1022
1023 if (T.isMFloat8())
1024 return "mf8";
1025
1026 if (T.isPoly())
1027 typeCode = 'p';
1028 else if (T.isInteger())
1029 typeCode = T.isSigned() ? 's' : 'u';
1030 else
1031 typeCode = 'f';
1032
1033 if (CK == ClassI) {
1034 switch (typeCode) {
1035 default:
1036 break;
1037 case 's':
1038 case 'u':
1039 case 'p':
1040 typeCode = 'i';
1041 break;
1042 }
1043 }
1044 if (CK == ClassB && TargetGuard == "neon") {
1045 typeCode = '\0';
1046 }
1047
1048 std::string S;
1049 if (typeCode != '\0')
1050 S.push_back(c: typeCode);
1051 if (printNumber)
1052 S += utostr(X: T.getElementSizeInBits());
1053
1054 return S;
1055}
1056
1057std::string Intrinsic::getBuiltinTypeStr() {
1058 ClassKind LocalCK = getClassKind(UseClassBIfScalar: true);
1059 std::string S;
1060
1061 Type RetT = getReturnType();
1062 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
1063 !RetT.isFloating() && !RetT.isBFloat16() && !RetT.isMFloat8())
1064 RetT.makeInteger(ElemWidth: RetT.getElementSizeInBits(), Sign: false);
1065
1066 // Since the return value must be one type, return a vector type of the
1067 // appropriate width which we will bitcast. An exception is made for
1068 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
1069 // fashion, storing them to a pointer arg.
1070 if (RetT.getNumVectors() > 1) {
1071 S += "vv*"; // void result with void* first argument
1072 } else {
1073 if (RetT.isPoly())
1074 RetT.makeInteger(ElemWidth: RetT.getElementSizeInBits(), Sign: false);
1075 if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
1076 RetT.makeSigned();
1077
1078 if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
1079 // Cast to vector of 8-bit elements.
1080 RetT.makeInteger(ElemWidth: 8, Sign: true);
1081
1082 S += RetT.builtin_str();
1083 }
1084
1085 for (unsigned I = 0; I < getNumParams(); ++I) {
1086 Type T = getParamType(I);
1087 if (T.isPoly())
1088 T.makeInteger(ElemWidth: T.getElementSizeInBits(), Sign: false);
1089
1090 if (LocalCK == ClassB && !T.isScalar())
1091 T.makeInteger(ElemWidth: 8, Sign: true);
1092 // Halves always get converted to 8-bit elements.
1093 if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
1094 T.makeInteger(ElemWidth: 8, Sign: true);
1095
1096 if (LocalCK == ClassI && T.isInteger())
1097 T.makeSigned();
1098
1099 if (isArgImmediate(idx: I))
1100 T.makeImmediate(ElemWidth: 32);
1101
1102 S += T.builtin_str();
1103 }
1104
1105 // Extra constant integer to hold type class enum for this function, e.g. s8
1106 if (LocalCK == ClassB)
1107 S += "i";
1108
1109 return S;
1110}
1111
1112std::string Intrinsic::getMangledName(bool ForceClassS) const {
1113 // Check if the prototype has a scalar operand with the type of the vector
1114 // elements. If not, bitcasting the args will take care of arg checking.
1115 // The actual signedness etc. will be taken care of with special enums.
1116 ClassKind LocalCK = CK;
1117 if (!protoHasScalar())
1118 LocalCK = ClassB;
1119
1120 return mangleName(Name, CK: ForceClassS ? ClassS : LocalCK);
1121}
1122
1123std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
1124 std::string typeCode = getInstTypeCode(T: BaseType, CK: LocalCK);
1125 std::string S = Name;
1126
1127 if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
1128 Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||
1129 Name == "vcvt_f32_bf16")
1130 return Name;
1131
1132 if (!typeCode.empty()) {
1133 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
1134 if (Name.size() >= 3 && isdigit(Name.back()) &&
1135 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
1136 S.insert(pos1: S.length() - 3, str: "_" + typeCode);
1137 else
1138 S += "_" + typeCode;
1139 }
1140
1141 if (BaseType != InBaseType) {
1142 // A reinterpret - out the input base type at the end.
1143 S += "_" + getInstTypeCode(T: InBaseType, CK: LocalCK);
1144 }
1145
1146 if (LocalCK == ClassB && TargetGuard == "neon")
1147 S += "_v";
1148
1149 // Insert a 'q' before the first '_' character so that it ends up before
1150 // _lane or _n on vector-scalar operations.
1151 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
1152 size_t Pos = S.find(c: '_');
1153 S.insert(pos: Pos, s: "q");
1154 }
1155
1156 char Suffix = '\0';
1157 if (BaseType.isScalarForMangling()) {
1158 switch (BaseType.getElementSizeInBits()) {
1159 case 8: Suffix = 'b'; break;
1160 case 16: Suffix = 'h'; break;
1161 case 32: Suffix = 's'; break;
1162 case 64: Suffix = 'd'; break;
1163 default: llvm_unreachable("Bad suffix!");
1164 }
1165 }
1166 if (Suffix != '\0') {
1167 size_t Pos = S.find(c: '_');
1168 S.insert(pos: Pos, s: &Suffix, n: 1);
1169 }
1170
1171 return S;
1172}
1173
1174std::string Intrinsic::replaceParamsIn(std::string S) {
1175 while (S.find(c: '$') != std::string::npos) {
1176 size_t Pos = S.find(c: '$');
1177 size_t End = Pos + 1;
1178 while (isalpha(S[End]))
1179 ++End;
1180
1181 std::string VarName = S.substr(pos: Pos + 1, n: End - Pos - 1);
1182 assert_with_loc(Assertion: Variables.find(x: VarName) != Variables.end(),
1183 Str: "Variable not defined!");
1184 S.replace(pos: Pos, n: End - Pos, str: Variables.find(x: VarName)->second.getName());
1185 }
1186
1187 return S;
1188}
1189
1190void Intrinsic::initVariables() {
1191 Variables.clear();
1192
1193 // Modify the TypeSpec per-argument to get a concrete Type, and create
1194 // known variables for each.
1195 for (unsigned I = 1; I < Types.size(); ++I) {
1196 char NameC = '0' + (I - 1);
1197 std::string Name = "p";
1198 Name.push_back(c: NameC);
1199
1200 Variables[Name] = Variable(Types[I], Name + VariablePostfix);
1201 }
1202 RetVar = Variable(Types[0], "ret" + VariablePostfix);
1203}
1204
1205void Intrinsic::emitPrototype(StringRef NamePrefix) {
1206 if (UseMacro) {
1207 OS << "#define ";
1208 } else {
1209 OS << "__ai ";
1210 if (TargetGuard != "")
1211 OS << "__attribute__((target(\"" << TargetGuard << "\"))) ";
1212 OS << Types[0].str() << " ";
1213 }
1214
1215 OS << NamePrefix.str() << mangleName(Name, LocalCK: ClassS) << "(";
1216
1217 for (unsigned I = 0; I < getNumParams(); ++I) {
1218 if (I != 0)
1219 OS << ", ";
1220
1221 char NameC = '0' + I;
1222 std::string Name = "p";
1223 Name.push_back(c: NameC);
1224 assert(Variables.find(Name) != Variables.end());
1225 Variable &V = Variables[Name];
1226
1227 if (!UseMacro)
1228 OS << V.getType().str() << " ";
1229 OS << V.getName();
1230 }
1231
1232 OS << ")";
1233}
1234
1235void Intrinsic::emitOpeningBrace() {
1236 if (UseMacro)
1237 OS << " __extension__ ({";
1238 else
1239 OS << " {";
1240 emitNewLine();
1241}
1242
1243void Intrinsic::emitClosingBrace() {
1244 if (UseMacro)
1245 OS << "})";
1246 else
1247 OS << "}";
1248}
1249
1250void Intrinsic::emitNewLine() {
1251 if (UseMacro)
1252 OS << " \\\n";
1253 else
1254 OS << "\n";
1255}
1256
1257void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
1258 if (Dest.getType().getNumVectors() > 1) {
1259 emitNewLine();
1260
1261 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
1262 OS << " " << Dest.getName() << ".val[" << K << "] = "
1263 << "__builtin_shufflevector(" << Src.getName() << ".val[" << K << "], "
1264 << Src.getName() << ".val[" << K << "], __lane_reverse_"
1265 << Dest.getType().getSizeInBits() << "_"
1266 << Dest.getType().getElementSizeInBits() << ");";
1267 emitNewLine();
1268 }
1269 } else {
1270 OS << " " << Dest.getName() << " = __builtin_shufflevector("
1271 << Src.getName() << ", " << Src.getName() << ", __lane_reverse_"
1272 << Dest.getType().getSizeInBits() << "_"
1273 << Dest.getType().getElementSizeInBits() << ");";
1274 emitNewLine();
1275 }
1276}
1277
1278void Intrinsic::emitArgumentReversal() {
1279 if (isBigEndianSafe())
1280 return;
1281
1282 // Reverse all vector arguments.
1283 for (unsigned I = 0; I < getNumParams(); ++I) {
1284 std::string Name = "p" + utostr(X: I);
1285 std::string NewName = "rev" + utostr(X: I);
1286
1287 Variable &V = Variables[Name];
1288 Variable NewV(V.getType(), NewName + VariablePostfix);
1289
1290 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
1291 continue;
1292
1293 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";
1294 emitReverseVariable(Dest&: NewV, Src&: V);
1295 V = NewV;
1296 }
1297}
1298
1299void Intrinsic::emitReturnVarDecl() {
1300 assert(RetVar.getType() == Types[0]);
1301 // Create a return variable, if we're not void.
1302 if (!RetVar.getType().isVoid()) {
1303 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
1304 emitNewLine();
1305 }
1306}
1307
1308void Intrinsic::emitReturnReversal() {
1309 if (isBigEndianSafe())
1310 return;
1311 if (!getReturnType().isVector() || getReturnType().isVoid() ||
1312 getReturnType().getNumElements() == 1)
1313 return;
1314 emitReverseVariable(Dest&: RetVar, Src&: RetVar);
1315}
1316
1317void Intrinsic::emitShadowedArgs() {
1318 // Macro arguments are not type-checked like inline function arguments,
1319 // so assign them to local temporaries to get the right type checking.
1320 if (!UseMacro)
1321 return;
1322
1323 for (unsigned I = 0; I < getNumParams(); ++I) {
1324 // Do not create a temporary for an immediate argument.
1325 // That would defeat the whole point of using a macro!
1326 if (getParamType(I).isImmediate())
1327 continue;
1328 // Do not create a temporary for pointer arguments. The input
1329 // pointer may have an alignment hint.
1330 if (getParamType(I).isPointer())
1331 continue;
1332
1333 std::string Name = "p" + utostr(X: I);
1334
1335 assert(Variables.find(Name) != Variables.end());
1336 Variable &V = Variables[Name];
1337
1338 std::string NewName = "s" + utostr(X: I);
1339 Variable V2(V.getType(), NewName + VariablePostfix);
1340
1341 OS << " " << V2.getType().str() << " " << V2.getName() << " = "
1342 << V.getName() << ";";
1343 emitNewLine();
1344
1345 V = V2;
1346 }
1347}
1348
1349bool Intrinsic::protoHasScalar() const {
1350 return any_of(Range: Types,
1351 P: [](const Type &T) { return T.isScalar() && !T.isImmediate(); });
1352}
1353
1354void Intrinsic::emitBodyAsBuiltinCall() {
1355 std::string S;
1356
1357 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1358 // sret-like argument.
1359 bool SRet = getReturnType().getNumVectors() >= 2;
1360
1361 StringRef N = Name;
1362 ClassKind LocalCK = CK;
1363 if (!protoHasScalar())
1364 LocalCK = ClassB;
1365
1366 if (!getReturnType().isVoid() && !SRet)
1367 S += "__builtin_bit_cast(" + RetVar.getType().str() + ", ";
1368
1369 S += "__builtin_neon_" + mangleName(Name: std::string(N), LocalCK) + "(";
1370
1371 if (SRet)
1372 S += "&" + RetVar.getName() + ", ";
1373
1374 for (unsigned I = 0; I < getNumParams(); ++I) {
1375 Variable &V = Variables["p" + utostr(X: I)];
1376 Type T = V.getType();
1377
1378 // Handle multiple-vector values specially, emitting each subvector as an
1379 // argument to the builtin.
1380 if (T.getNumVectors() > 1) {
1381 // Check if an explicit cast is needed.
1382 std::string Cast;
1383 if (LocalCK == ClassB) {
1384 Type T2 = T;
1385 T2.makeOneVector();
1386 T2.makeInteger(ElemWidth: 8, /*Sign=*/true);
1387 Cast = "__builtin_bit_cast(" + T2.str() + ", ";
1388 }
1389
1390 for (unsigned J = 0; J < T.getNumVectors(); ++J)
1391 S += Cast + V.getName() + ".val[" + utostr(X: J) + "]" +
1392 (Cast.empty() ? ", " : "), ");
1393 continue;
1394 }
1395
1396 std::string Arg = V.getName();
1397 Type CastToType = T;
1398
1399 // Check if an explicit cast is needed.
1400 if (CastToType.isVector()) {
1401 if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
1402 CastToType.makeInteger(ElemWidth: 8, Sign: true);
1403 Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1404 } else if (LocalCK == ClassI) {
1405 if (CastToType.isInteger()) {
1406 CastToType.makeSigned();
1407 Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
1408 }
1409 }
1410 }
1411
1412 S += Arg + ", ";
1413 }
1414
1415 // Extra constant integer to hold type class enum for this function, e.g. s8
1416 if (getClassKind(UseClassBIfScalar: true) == ClassB) {
1417 S += utostr(X: getPolymorphicKeyType().getNeonEnum());
1418 } else {
1419 // Remove extraneous ", ".
1420 S.pop_back();
1421 S.pop_back();
1422 }
1423
1424 if (!getReturnType().isVoid() && !SRet)
1425 S += ")";
1426 S += ");";
1427
1428 std::string RetExpr;
1429 if (!SRet && !RetVar.getType().isVoid())
1430 RetExpr = RetVar.getName() + " = ";
1431
1432 OS << " " << RetExpr << S;
1433 emitNewLine();
1434}
1435
1436void Intrinsic::emitBody(StringRef CallPrefix) {
1437 std::vector<std::string> Lines;
1438
1439 if (!Body || Body->empty()) {
1440 // Nothing specific to output - must output a builtin.
1441 emitBodyAsBuiltinCall();
1442 return;
1443 }
1444
1445 // We have a list of "things to output". The last should be returned.
1446 for (auto *I : Body->getElements()) {
1447 if (const auto *SI = dyn_cast<StringInit>(Val: I)) {
1448 Lines.push_back(x: replaceParamsIn(S: SI->getAsString()));
1449 } else if (const auto *DI = dyn_cast<DagInit>(Val: I)) {
1450 DagEmitter DE(*this, CallPrefix);
1451 Lines.push_back(x: DE.emitDag(DI).second + ";");
1452 }
1453 }
1454
1455 assert(!Lines.empty() && "Empty def?");
1456 if (!RetVar.getType().isVoid())
1457 Lines.back().insert(pos1: 0, str: RetVar.getName() + " = ");
1458
1459 for (auto &L : Lines) {
1460 OS << " " << L;
1461 emitNewLine();
1462 }
1463}
1464
1465void Intrinsic::emitReturn() {
1466 if (RetVar.getType().isVoid())
1467 return;
1468 if (UseMacro)
1469 OS << " " << RetVar.getName() << ";";
1470 else
1471 OS << " return " << RetVar.getName() << ";";
1472 emitNewLine();
1473}
1474
1475std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(const DagInit *DI) {
1476 // At this point we should only be seeing a def.
1477 const DefInit *DefI = cast<DefInit>(Val: DI->getOperator());
1478 std::string Op = DefI->getAsString();
1479
1480 if (Op == "cast" || Op == "bitcast")
1481 return emitDagCast(DI, IsBitCast: Op == "bitcast");
1482 if (Op == "shuffle")
1483 return emitDagShuffle(DI);
1484 if (Op == "dup")
1485 return emitDagDup(DI);
1486 if (Op == "dup_typed")
1487 return emitDagDupTyped(DI);
1488 if (Op == "splat")
1489 return emitDagSplat(DI);
1490 if (Op == "save_temp")
1491 return emitDagSaveTemp(DI);
1492 if (Op == "op")
1493 return emitDagOp(DI);
1494 if (Op == "call" || Op == "call_mangled")
1495 return emitDagCall(DI, MatchMangledName: Op == "call_mangled");
1496 if (Op == "name_replace")
1497 return emitDagNameReplace(DI);
1498 if (Op == "literal")
1499 return emitDagLiteral(DI);
1500 assert_with_loc(Assertion: false, Str: "Unknown operation!");
1501 return std::make_pair(x: Type::getVoid(), y: "");
1502}
1503
1504std::pair<Type, std::string>
1505Intrinsic::DagEmitter::emitDagOp(const DagInit *DI) {
1506 std::string Op = cast<StringInit>(Val: DI->getArg(Num: 0))->getAsUnquotedString();
1507 if (DI->getNumArgs() == 2) {
1508 // Unary op.
1509 std::pair<Type, std::string> R =
1510 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1511 return std::make_pair(x&: R.first, y: Op + R.second);
1512 } else {
1513 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
1514 std::pair<Type, std::string> R1 =
1515 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1516 std::pair<Type, std::string> R2 =
1517 emitDagArg(Arg: DI->getArg(Num: 2), ArgName: std::string(DI->getArgNameStr(Num: 2)));
1518 assert_with_loc(Assertion: R1.first == R2.first, Str: "Argument type mismatch!");
1519 return std::make_pair(x&: R1.first, y: R1.second + " " + Op + " " + R2.second);
1520 }
1521}
1522
1523std::pair<Type, std::string>
1524Intrinsic::DagEmitter::emitDagCall(const DagInit *DI, bool MatchMangledName) {
1525 std::vector<Type> Types;
1526 std::vector<std::string> Values;
1527 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1528 std::pair<Type, std::string> R =
1529 emitDagArg(Arg: DI->getArg(Num: I + 1), ArgName: std::string(DI->getArgNameStr(Num: I + 1)));
1530 Types.push_back(x: R.first);
1531 Values.push_back(x: R.second);
1532 }
1533
1534 // Look up the called intrinsic.
1535 std::string N;
1536 if (const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: 0)))
1537 N = SI->getAsUnquotedString();
1538 else
1539 N = emitDagArg(Arg: DI->getArg(Num: 0), ArgName: "").second;
1540 std::optional<std::string> MangledName;
1541 if (MatchMangledName) {
1542 if (Intr.getRecord()->getValueAsString(FieldName: "Name").contains(Other: "laneq"))
1543 N += "q";
1544 MangledName = Intr.mangleName(Name: N, LocalCK: ClassS);
1545 }
1546 Intrinsic &Callee = Intr.Emitter.getIntrinsic(Name: N, Types, MangledName);
1547
1548 // Make sure the callee is known as an early def.
1549 Callee.setNeededEarly();
1550 Intr.Dependencies.insert(x: &Callee);
1551
1552 // Now create the call itself.
1553 std::string S;
1554 if (!Callee.isBigEndianSafe())
1555 S += CallPrefix.str();
1556 S += Callee.getMangledName(ForceClassS: true) + "(";
1557 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1558 if (I != 0)
1559 S += ", ";
1560 S += Values[I];
1561 }
1562 S += ")";
1563
1564 return std::make_pair(x: Callee.getReturnType(), y&: S);
1565}
1566
1567std::pair<Type, std::string>
1568Intrinsic::DagEmitter::emitDagCast(const DagInit *DI, bool IsBitCast) {
1569 // (cast MOD* VAL) -> cast VAL to type given by MOD.
1570 std::pair<Type, std::string> R =
1571 emitDagArg(Arg: DI->getArg(Num: DI->getNumArgs() - 1),
1572 ArgName: std::string(DI->getArgNameStr(Num: DI->getNumArgs() - 1)));
1573 Type castToType = R.first;
1574 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
1575
1576 // MOD can take several forms:
1577 // 1. $X - take the type of parameter / variable X.
1578 // 2. The value "R" - take the type of the return type.
1579 // 3. a type string
1580 // 4. The value "U" or "S" to switch the signedness.
1581 // 5. The value "H" or "D" to half or double the bitwidth.
1582 // 6. The value "8" to convert to 8-bit (signed) integer lanes.
1583 if (!DI->getArgNameStr(Num: ArgIdx).empty()) {
1584 assert_with_loc(Assertion: Intr.Variables.find(x: DI->getArgNameStr(Num: ArgIdx)) !=
1585 Intr.Variables.end(),
1586 Str: "Variable not found");
1587 castToType =
1588 Intr.Variables[std::string(DI->getArgNameStr(Num: ArgIdx))].getType();
1589 } else {
1590 const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: ArgIdx));
1591 assert_with_loc(Assertion: SI, Str: "Expected string type or $Name for cast type");
1592
1593 if (SI->getAsUnquotedString() == "R") {
1594 castToType = Intr.getReturnType();
1595 } else if (SI->getAsUnquotedString() == "U") {
1596 castToType.makeUnsigned();
1597 } else if (SI->getAsUnquotedString() == "S") {
1598 castToType.makeSigned();
1599 } else if (SI->getAsUnquotedString() == "H") {
1600 castToType.halveLanes();
1601 } else if (SI->getAsUnquotedString() == "D") {
1602 castToType.doubleLanes();
1603 } else if (SI->getAsUnquotedString() == "8") {
1604 castToType.makeInteger(ElemWidth: 8, Sign: true);
1605 } else if (SI->getAsUnquotedString() == "32") {
1606 castToType.make32BitElement();
1607 } else {
1608 castToType = Type::fromTypedefName(Name: SI->getAsUnquotedString());
1609 assert_with_loc(Assertion: !castToType.isVoid(), Str: "Unknown typedef");
1610 }
1611 }
1612 }
1613
1614 std::string S;
1615 if (IsBitCast)
1616 S = "__builtin_bit_cast(" + castToType.str() + ", " + R.second + ")";
1617 else
1618 S = "(" + castToType.str() + ")(" + R.second + ")";
1619
1620 return std::make_pair(x&: castToType, y&: S);
1621}
1622
1623std::pair<Type, std::string>
1624Intrinsic::DagEmitter::emitDagShuffle(const DagInit *DI) {
1625 // See the documentation in arm_neon.td for a description of these operators.
1626 class LowHalf : public SetTheory::Operator {
1627 public:
1628 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1629 ArrayRef<SMLoc> Loc) override {
1630 SetTheory::RecSet Elts2;
1631 ST.evaluate(begin: Expr->arg_begin(), end: Expr->arg_end(), Elts&: Elts2, Loc);
1632 Elts.insert(Start: Elts2.begin(), End: Elts2.begin() + (Elts2.size() / 2));
1633 }
1634 };
1635
1636 class HighHalf : public SetTheory::Operator {
1637 public:
1638 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1639 ArrayRef<SMLoc> Loc) override {
1640 SetTheory::RecSet Elts2;
1641 ST.evaluate(begin: Expr->arg_begin(), end: Expr->arg_end(), Elts&: Elts2, Loc);
1642 Elts.insert(Start: Elts2.begin() + (Elts2.size() / 2), End: Elts2.end());
1643 }
1644 };
1645
1646 class Rev : public SetTheory::Operator {
1647 unsigned ElementSize;
1648
1649 public:
1650 Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
1651
1652 void apply(SetTheory &ST, const DagInit *Expr, SetTheory::RecSet &Elts,
1653 ArrayRef<SMLoc> Loc) override {
1654 SetTheory::RecSet Elts2;
1655 ST.evaluate(begin: Expr->arg_begin() + 1, end: Expr->arg_end(), Elts&: Elts2, Loc);
1656
1657 int64_t VectorSize = cast<IntInit>(Val: Expr->getArg(Num: 0))->getValue();
1658 VectorSize /= ElementSize;
1659
1660 std::vector<const Record *> Revved;
1661 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
1662 for (int LI = VectorSize - 1; LI >= 0; --LI) {
1663 Revved.push_back(x: Elts2[VI + LI]);
1664 }
1665 }
1666
1667 Elts.insert_range(R&: Revved);
1668 }
1669 };
1670
1671 class MaskExpander : public SetTheory::Expander {
1672 unsigned N;
1673
1674 public:
1675 MaskExpander(unsigned N) : N(N) {}
1676
1677 void expand(SetTheory &ST, const Record *R,
1678 SetTheory::RecSet &Elts) override {
1679 unsigned Addend = 0;
1680 if (R->getName() == "mask0")
1681 Addend = 0;
1682 else if (R->getName() == "mask1")
1683 Addend = N;
1684 else
1685 return;
1686 for (unsigned I = 0; I < N; ++I)
1687 Elts.insert(X: R->getRecords().getDef(Name: "sv" + utostr(X: I + Addend)));
1688 }
1689 };
1690
1691 // (shuffle arg1, arg2, sequence)
1692 std::pair<Type, std::string> Arg1 =
1693 emitDagArg(Arg: DI->getArg(Num: 0), ArgName: std::string(DI->getArgNameStr(Num: 0)));
1694 std::pair<Type, std::string> Arg2 =
1695 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1696 assert_with_loc(Assertion: Arg1.first == Arg2.first,
1697 Str: "Different types in arguments to shuffle!");
1698
1699 SetTheory ST;
1700 SetTheory::RecSet Elts;
1701 ST.addOperator(Name: "lowhalf", std::make_unique<LowHalf>());
1702 ST.addOperator(Name: "highhalf", std::make_unique<HighHalf>());
1703 ST.addOperator(Name: "rev",
1704 std::make_unique<Rev>(args: Arg1.first.getElementSizeInBits()));
1705 ST.addExpander(ClassName: "MaskExpand",
1706 std::make_unique<MaskExpander>(args: Arg1.first.getNumElements()));
1707 ST.evaluate(Expr: DI->getArg(Num: 2), Elts, Loc: {});
1708
1709 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
1710 for (auto &E : Elts) {
1711 StringRef Name = E->getName();
1712 assert_with_loc(Assertion: Name.starts_with(Prefix: "sv"),
1713 Str: "Incorrect element kind in shuffle mask!");
1714 S += ", " + Name.drop_front(N: 2).str();
1715 }
1716 S += ")";
1717
1718 // Recalculate the return type - the shuffle may have halved or doubled it.
1719 Type T(Arg1.first);
1720 if (Elts.size() > T.getNumElements()) {
1721 assert_with_loc(
1722 Assertion: Elts.size() == T.getNumElements() * 2,
1723 Str: "Can only double or half the number of elements in a shuffle!");
1724 T.doubleLanes();
1725 } else if (Elts.size() < T.getNumElements()) {
1726 assert_with_loc(
1727 Assertion: Elts.size() == T.getNumElements() / 2,
1728 Str: "Can only double or half the number of elements in a shuffle!");
1729 T.halveLanes();
1730 }
1731
1732 return std::make_pair(x&: T, y&: S);
1733}
1734
1735std::pair<Type, std::string>
1736Intrinsic::DagEmitter::emitDagDup(const DagInit *DI) {
1737 assert_with_loc(Assertion: DI->getNumArgs() == 1, Str: "dup() expects one argument");
1738 std::pair<Type, std::string> A =
1739 emitDagArg(Arg: DI->getArg(Num: 0), ArgName: std::string(DI->getArgNameStr(Num: 0)));
1740 assert_with_loc(Assertion: A.first.isScalar(), Str: "dup() expects a scalar argument");
1741
1742 Type T = Intr.getBaseType();
1743 assert_with_loc(Assertion: T.isVector(), Str: "dup() used but default type is scalar!");
1744 std::string S = "(" + T.str() + ") {";
1745 for (unsigned I = 0; I < T.getNumElements(); ++I) {
1746 if (I != 0)
1747 S += ", ";
1748 S += A.second;
1749 }
1750 S += "}";
1751
1752 return std::make_pair(x&: T, y&: S);
1753}
1754
1755std::pair<Type, std::string>
1756Intrinsic::DagEmitter::emitDagDupTyped(const DagInit *DI) {
1757 assert_with_loc(Assertion: DI->getNumArgs() == 2, Str: "dup_typed() expects two arguments");
1758 std::pair<Type, std::string> B =
1759 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1760 assert_with_loc(Assertion: B.first.isScalar(),
1761 Str: "dup_typed() requires a scalar as the second argument");
1762 Type T;
1763 // If the type argument is a constant string, construct the type directly.
1764 if (const auto *SI = dyn_cast<StringInit>(Val: DI->getArg(Num: 0))) {
1765 T = Type::fromTypedefName(Name: SI->getAsUnquotedString());
1766 assert_with_loc(Assertion: !T.isVoid(), Str: "Unknown typedef");
1767 } else
1768 T = emitDagArg(Arg: DI->getArg(Num: 0), ArgName: std::string(DI->getArgNameStr(Num: 0))).first;
1769
1770 assert_with_loc(Assertion: T.isVector(), Str: "dup_typed() used but target type is scalar!");
1771 std::string S = "(" + T.str() + ") {";
1772 for (unsigned I = 0; I < T.getNumElements(); ++I) {
1773 if (I != 0)
1774 S += ", ";
1775 S += B.second;
1776 }
1777 S += "}";
1778
1779 return std::make_pair(x&: T, y&: S);
1780}
1781
1782std::pair<Type, std::string>
1783Intrinsic::DagEmitter::emitDagSplat(const DagInit *DI) {
1784 assert_with_loc(Assertion: DI->getNumArgs() == 2, Str: "splat() expects two arguments");
1785 std::pair<Type, std::string> A =
1786 emitDagArg(Arg: DI->getArg(Num: 0), ArgName: std::string(DI->getArgNameStr(Num: 0)));
1787 std::pair<Type, std::string> B =
1788 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1789
1790 assert_with_loc(Assertion: B.first.isScalar(),
1791 Str: "splat() requires a scalar int as the second argument");
1792
1793 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
1794 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
1795 S += ", " + B.second;
1796 }
1797 S += ")";
1798
1799 return std::make_pair(x: Intr.getBaseType(), y&: S);
1800}
1801
1802std::pair<Type, std::string>
1803Intrinsic::DagEmitter::emitDagSaveTemp(const DagInit *DI) {
1804 assert_with_loc(Assertion: DI->getNumArgs() == 2, Str: "save_temp() expects two arguments");
1805 std::pair<Type, std::string> A =
1806 emitDagArg(Arg: DI->getArg(Num: 1), ArgName: std::string(DI->getArgNameStr(Num: 1)));
1807
1808 assert_with_loc(Assertion: !A.first.isVoid(),
1809 Str: "Argument to save_temp() must have non-void type!");
1810
1811 std::string N = std::string(DI->getArgNameStr(Num: 0));
1812 assert_with_loc(Assertion: !N.empty(),
1813 Str: "save_temp() expects a name as the first argument");
1814
1815 auto [It, Inserted] =
1816 Intr.Variables.try_emplace(k: N, args&: A.first, args: N + Intr.VariablePostfix);
1817 assert_with_loc(Assertion: Inserted, Str: "Variable already defined!");
1818
1819 std::string S = A.first.str() + " " + It->second.getName() + " = " + A.second;
1820
1821 return std::make_pair(x: Type::getVoid(), y&: S);
1822}
1823
1824std::pair<Type, std::string>
1825Intrinsic::DagEmitter::emitDagNameReplace(const DagInit *DI) {
1826 std::string S = Intr.Name;
1827
1828 assert_with_loc(Assertion: DI->getNumArgs() == 2, Str: "name_replace requires 2 arguments!");
1829 std::string ToReplace = cast<StringInit>(Val: DI->getArg(Num: 0))->getAsUnquotedString();
1830 std::string ReplaceWith = cast<StringInit>(Val: DI->getArg(Num: 1))->getAsUnquotedString();
1831
1832 size_t Idx = S.find(str: ToReplace);
1833
1834 assert_with_loc(Assertion: Idx != std::string::npos, Str: "name should contain '" + ToReplace + "'!");
1835 S.replace(pos: Idx, n: ToReplace.size(), str: ReplaceWith);
1836
1837 return std::make_pair(x: Type::getVoid(), y&: S);
1838}
1839
1840std::pair<Type, std::string>
1841Intrinsic::DagEmitter::emitDagLiteral(const DagInit *DI) {
1842 std::string Ty = cast<StringInit>(Val: DI->getArg(Num: 0))->getAsUnquotedString();
1843 std::string Value = cast<StringInit>(Val: DI->getArg(Num: 1))->getAsUnquotedString();
1844 return std::make_pair(x: Type::fromTypedefName(Name: Ty), y&: Value);
1845}
1846
1847std::pair<Type, std::string>
1848Intrinsic::DagEmitter::emitDagArg(const Init *Arg, std::string ArgName) {
1849 if (!ArgName.empty()) {
1850 assert_with_loc(Assertion: !Arg->isComplete(),
1851 Str: "Arguments must either be DAGs or names, not both!");
1852 assert_with_loc(Assertion: Intr.Variables.find(x: ArgName) != Intr.Variables.end(),
1853 Str: "Variable not defined!");
1854 Variable &V = Intr.Variables[ArgName];
1855 return std::make_pair(x: V.getType(), y: V.getName());
1856 }
1857
1858 assert(Arg && "Neither ArgName nor Arg?!");
1859 const auto *DI = dyn_cast<DagInit>(Val: Arg);
1860 assert_with_loc(Assertion: DI, Str: "Arguments must either be DAGs or names!");
1861
1862 return emitDag(DI);
1863}
1864
1865std::string Intrinsic::generate() {
1866 // Avoid duplicated code for big and little endian
1867 if (isBigEndianSafe()) {
1868 generateImpl(ReverseArguments: false, NamePrefix: "", CallPrefix: "");
1869 return OS.str();
1870 }
1871 // Little endian intrinsics are simple and don't require any argument
1872 // swapping.
1873 OS << "#ifdef __LITTLE_ENDIAN__\n";
1874
1875 generateImpl(ReverseArguments: false, NamePrefix: "", CallPrefix: "");
1876
1877 OS << "#else\n";
1878
1879 // Big endian intrinsics are more complex. The user intended these intrinsics
1880 // to operate on a vector "as-if" loaded by LDR (for AArch64), VLDR (for
1881 // 64-bit vectors on AArch32), or VLDM (for 128-bit vectors on AArch32) but
1882 // we load as-if LD1 (for AArch64) or VLD1 (for AArch32). So we should swap
1883 // all arguments and swap the return value too.
1884 //
1885 // If we call sub-intrinsics, we should call a version that does
1886 // not re-swap the arguments!
1887 generateImpl(ReverseArguments: true, NamePrefix: "", CallPrefix: "__noswap_");
1888
1889 // If we're needed early, create a non-swapping variant for
1890 // big-endian.
1891 if (NeededEarly) {
1892 generateImpl(ReverseArguments: false, NamePrefix: "__noswap_", CallPrefix: "__noswap_");
1893 }
1894 OS << "#endif\n\n";
1895
1896 return OS.str();
1897}
1898
1899void Intrinsic::generateImpl(bool ReverseArguments,
1900 StringRef NamePrefix, StringRef CallPrefix) {
1901 CurrentRecord = R;
1902
1903 // If we call a macro, our local variables may be corrupted due to
1904 // lack of proper lexical scoping. So, add a globally unique postfix
1905 // to every variable.
1906 //
1907 // indexBody() should have set up the Dependencies set by now.
1908 for (auto *I : Dependencies)
1909 if (I->UseMacro) {
1910 VariablePostfix = "_" + utostr(X: Emitter.getUniqueNumber());
1911 break;
1912 }
1913
1914 initVariables();
1915
1916 emitPrototype(NamePrefix);
1917
1918 if (IsUnavailable) {
1919 OS << " __attribute__((unavailable));";
1920 } else {
1921 emitOpeningBrace();
1922 // Emit return variable declaration first as to not trigger
1923 // -Wdeclaration-after-statement.
1924 emitReturnVarDecl();
1925 emitShadowedArgs();
1926 if (ReverseArguments)
1927 emitArgumentReversal();
1928 emitBody(CallPrefix);
1929 if (ReverseArguments)
1930 emitReturnReversal();
1931 emitReturn();
1932 emitClosingBrace();
1933 }
1934 OS << "\n";
1935
1936 CurrentRecord = nullptr;
1937}
1938
1939void Intrinsic::indexBody() {
1940 CurrentRecord = R;
1941
1942 initVariables();
1943 // Emit return variable declaration first as to not trigger
1944 // -Wdeclaration-after-statement.
1945 emitReturnVarDecl();
1946 emitBody(CallPrefix: "");
1947 OS.str(s: "");
1948
1949 CurrentRecord = nullptr;
1950}
1951
1952//===----------------------------------------------------------------------===//
1953// NeonEmitter implementation
1954//===----------------------------------------------------------------------===//
1955
1956Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
1957 std::optional<std::string> MangledName) {
1958 // First, look up the name in the intrinsic map.
1959 assert_with_loc(Assertion: IntrinsicMap.find(x: Name) != IntrinsicMap.end(),
1960 Str: ("Intrinsic '" + Name + "' not found!").str());
1961 auto &V = IntrinsicMap.find(x: Name)->second;
1962 std::vector<Intrinsic *> GoodVec;
1963
1964 // Create a string to print if we end up failing.
1965 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
1966 for (unsigned I = 0; I < Types.size(); ++I) {
1967 if (I != 0)
1968 ErrMsg += ", ";
1969 ErrMsg += Types[I].str();
1970 }
1971 ErrMsg += ")'\n";
1972 ErrMsg += "Available overloads:\n";
1973
1974 // Now, look through each intrinsic implementation and see if the types are
1975 // compatible.
1976 for (auto &I : V) {
1977 ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName();
1978 ErrMsg += "(";
1979 for (unsigned A = 0; A < I.getNumParams(); ++A) {
1980 if (A != 0)
1981 ErrMsg += ", ";
1982 ErrMsg += I.getParamType(I: A).str();
1983 }
1984 ErrMsg += ")\n";
1985
1986 if (MangledName && MangledName != I.getMangledName(ForceClassS: true))
1987 continue;
1988
1989 if (I.getNumParams() != Types.size())
1990 continue;
1991
1992 unsigned ArgNum = 0;
1993 bool MatchingArgumentTypes = all_of(Range&: Types, P: [&](const auto &Type) {
1994 return Type == I.getParamType(I: ArgNum++);
1995 });
1996
1997 if (MatchingArgumentTypes)
1998 GoodVec.push_back(x: &I);
1999 }
2000
2001 assert_with_loc(Assertion: !GoodVec.empty(),
2002 Str: "No compatible intrinsic found - " + ErrMsg);
2003 assert_with_loc(Assertion: GoodVec.size() == 1, Str: "Multiple overloads found - " + ErrMsg);
2004
2005 return *GoodVec.front();
2006}
2007
2008void NeonEmitter::createIntrinsic(const Record *R,
2009 SmallVectorImpl<Intrinsic *> &Out) {
2010 std::string Name = std::string(R->getValueAsString(FieldName: "Name"));
2011 std::string Proto = std::string(R->getValueAsString(FieldName: "Prototype"));
2012 std::string Types = std::string(R->getValueAsString(FieldName: "Types"));
2013 const Record *OperationRec = R->getValueAsDef(FieldName: "Operation");
2014 bool BigEndianSafe = R->getValueAsBit(FieldName: "BigEndianSafe");
2015 std::string ArchGuard = std::string(R->getValueAsString(FieldName: "ArchGuard"));
2016 std::string TargetGuard = std::string(R->getValueAsString(FieldName: "TargetGuard"));
2017 bool IsUnavailable = OperationRec->getValueAsBit(FieldName: "Unavailable");
2018 std::string CartesianProductWith = std::string(R->getValueAsString(FieldName: "CartesianProductWith"));
2019
2020 // Set the global current record. This allows assert_with_loc to produce
2021 // decent location information even when highly nested.
2022 CurrentRecord = R;
2023
2024 const ListInit *Body = OperationRec->getValueAsListInit(FieldName: "Ops");
2025
2026 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Str: Types);
2027
2028 ClassKind CK = ClassNone;
2029 if (!R->getDirectSuperClasses().empty())
2030 CK = ClassMap[R->getDirectSuperClasses()[0].first];
2031
2032 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
2033 if (!CartesianProductWith.empty()) {
2034 std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(Str: CartesianProductWith);
2035 for (auto TS : TypeSpecs) {
2036 Type DefaultT(TS, ".");
2037 for (auto SrcTS : ProductTypeSpecs) {
2038 Type DefaultSrcT(SrcTS, ".");
2039 if (TS == SrcTS ||
2040 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
2041 continue;
2042 NewTypeSpecs.push_back(x: std::make_pair(x&: TS, y&: SrcTS));
2043 }
2044 }
2045 } else {
2046 for (auto TS : TypeSpecs) {
2047 NewTypeSpecs.push_back(x: std::make_pair(x&: TS, y&: TS));
2048 }
2049 }
2050
2051 sort(C&: NewTypeSpecs);
2052 NewTypeSpecs.erase(first: llvm::unique(R&: NewTypeSpecs), last: NewTypeSpecs.end());
2053 auto &Entry = IntrinsicMap[Name];
2054
2055 for (auto &I : NewTypeSpecs) {
2056
2057 // MFloat8 type is only available on AArch64. If encountered set ArchGuard
2058 // correctly.
2059 std::string NewArchGuard = ArchGuard;
2060 if (Type(I.first, ".").isMFloat8()) {
2061 if (NewArchGuard.empty()) {
2062 NewArchGuard = "defined(__aarch64__)";
2063 } else if (NewArchGuard.find(s: "defined(__aarch64__)") ==
2064 std::string::npos) {
2065 NewArchGuard = "defined(__aarch64__) && (" + NewArchGuard + ")";
2066 }
2067 }
2068 Entry.emplace_back(args&: R, args&: Name, args&: Proto, args&: I.first, args&: I.second, args&: CK, args&: Body, args&: *this,
2069 args&: NewArchGuard, args&: TargetGuard, args&: IsUnavailable, args&: BigEndianSafe);
2070 Out.push_back(Elt: &Entry.back());
2071 }
2072
2073 CurrentRecord = nullptr;
2074}
2075
2076/// genBuiltinsDef: Generate the builtin infos, checking for unique builtin
2077/// declarations.
2078void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
2079 SmallVectorImpl<Intrinsic *> &Defs) {
2080 // We only want to emit a builtin once, and in order of its name.
2081 std::map<std::string, Intrinsic *> Builtins;
2082
2083 llvm::StringToOffsetTable Table;
2084 Table.GetOrAddStringOffset(Str: "");
2085 Table.GetOrAddStringOffset(Str: "n");
2086
2087 for (auto *Def : Defs) {
2088 if (Def->hasBody())
2089 continue;
2090
2091 if (Builtins.insert(x: {Def->getMangledName(), Def}).second) {
2092 Table.GetOrAddStringOffset(Str: Def->getMangledName());
2093 Table.GetOrAddStringOffset(Str: Def->getBuiltinTypeStr());
2094 Table.GetOrAddStringOffset(Str: Def->getTargetGuard());
2095 }
2096 }
2097
2098 OS << "#ifdef GET_NEON_BUILTIN_ENUMERATORS\n";
2099 for (const auto &[Name, Def] : Builtins) {
2100 OS << " BI__builtin_neon_" << Name << ",\n";
2101 }
2102 OS << "#endif // GET_NEON_BUILTIN_ENUMERATORS\n\n";
2103
2104 OS << "#ifdef GET_NEON_BUILTIN_STR_TABLE\n";
2105 Table.EmitStringTableDef(OS, Name: "BuiltinStrings");
2106 OS << "#endif // GET_NEON_BUILTIN_STR_TABLE\n\n";
2107
2108 OS << "#ifdef GET_NEON_BUILTIN_INFOS\n";
2109 for (const auto &[Name, Def] : Builtins) {
2110 OS << " Builtin::Info{Builtin::Info::StrOffsets{"
2111 << Table.GetStringOffset(Str: Def->getMangledName()) << " /* "
2112 << Def->getMangledName() << " */, ";
2113 OS << Table.GetStringOffset(Str: Def->getBuiltinTypeStr()) << " /* "
2114 << Def->getBuiltinTypeStr() << " */, ";
2115 OS << Table.GetStringOffset(Str: "n") << " /* n */, ";
2116 OS << Table.GetStringOffset(Str: Def->getTargetGuard()) << " /* "
2117 << Def->getTargetGuard() << " */}, ";
2118 OS << "HeaderDesc::NO_HEADER, ALL_LANGUAGES},\n";
2119 }
2120 OS << "#endif // GET_NEON_BUILTIN_INFOS\n\n";
2121}
2122
2123void NeonEmitter::genStreamingSVECompatibleList(
2124 raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2125 OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n";
2126
2127 std::set<std::string> Emitted;
2128 for (auto *Def : Defs) {
2129 // If the def has a body (that is, it has Operation DAGs), it won't call
2130 // __builtin_neon_* so we don't need to generate a definition for it.
2131 if (Def->hasBody())
2132 continue;
2133
2134 std::string Name = Def->getMangledName();
2135 if (Emitted.find(x: Name) != Emitted.end())
2136 continue;
2137
2138 // FIXME: We should make exceptions here for some NEON builtins that are
2139 // permitted in streaming mode.
2140 OS << "case NEON::BI__builtin_neon_" << Name
2141 << ": BuiltinType = ArmNonStreaming; break;\n";
2142 Emitted.insert(x: Name);
2143 }
2144 OS << "#endif\n\n";
2145}
2146
2147/// Generate the ARM and AArch64 overloaded type checking code for
2148/// SemaChecking.cpp, checking for unique builtin declarations.
2149void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
2150 SmallVectorImpl<Intrinsic *> &Defs) {
2151 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
2152
2153 // We record each overload check line before emitting because subsequent Inst
2154 // definitions may extend the number of permitted types (i.e. augment the
2155 // Mask). Use std::map to avoid sorting the table by hash number.
2156 struct OverloadInfo {
2157 uint64_t Mask = 0ULL;
2158 int PtrArgNum = 0;
2159 bool HasConstPtr = false;
2160 OverloadInfo() = default;
2161 };
2162 std::map<std::string, OverloadInfo> OverloadMap;
2163
2164 for (auto *Def : Defs) {
2165 // If the def has a body (that is, it has Operation DAGs), it won't call
2166 // __builtin_neon_* so we don't need to generate a definition for it.
2167 if (Def->hasBody())
2168 continue;
2169 // Functions which have a scalar argument cannot be overloaded, no need to
2170 // check them if we are emitting the type checking code.
2171 if (Def->protoHasScalar())
2172 continue;
2173
2174 uint64_t Mask = 0ULL;
2175 Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum();
2176
2177 // Check if the function has a pointer or const pointer argument.
2178 int PtrArgNum = -1;
2179 bool HasConstPtr = false;
2180 for (unsigned I = 0; I < Def->getNumParams(); ++I) {
2181 const auto &Type = Def->getParamType(I);
2182 if (Type.isPointer()) {
2183 PtrArgNum = I;
2184 HasConstPtr = Type.isConstPointer();
2185 }
2186 }
2187
2188 // For sret builtins, adjust the pointer argument index.
2189 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
2190 PtrArgNum += 1;
2191
2192 std::string Name = Def->getName();
2193 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2194 // vst1_lane, vldap1_lane, and vstl1_lane intrinsics. Using a pointer to
2195 // the vector element type with one of those operations causes codegen to
2196 // select an aligned load/store instruction. If you want an unaligned
2197 // operation, the pointer argument needs to have less alignment than element
2198 // type, so just accept any pointer type.
2199 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane" ||
2200 Name == "vldap1_lane" || Name == "vstl1_lane") {
2201 PtrArgNum = -1;
2202 HasConstPtr = false;
2203 }
2204
2205 if (Mask) {
2206 OverloadInfo &OI = OverloadMap[Def->getMangledName()];
2207 OI.Mask |= Mask;
2208 OI.PtrArgNum |= PtrArgNum;
2209 OI.HasConstPtr = HasConstPtr;
2210 }
2211 }
2212
2213 for (auto &I : OverloadMap) {
2214 OverloadInfo &OI = I.second;
2215
2216 OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
2217 OS << "mask = 0x" << Twine::utohexstr(Val: OI.Mask) << "ULL";
2218 if (OI.PtrArgNum >= 0)
2219 OS << "; PtrArgNum = " << OI.PtrArgNum;
2220 if (OI.HasConstPtr)
2221 OS << "; HasConstPtr = true";
2222 OS << "; break;\n";
2223 }
2224 OS << "#endif\n\n";
2225}
2226
2227inline bool
2228NeonEmitter::areRangeChecksCompatible(const ArrayRef<ImmCheck> ChecksA,
2229 const ArrayRef<ImmCheck> ChecksB) {
2230 // If multiple intrinsics map to the same builtin, we must ensure that the
2231 // intended range checks performed in SemaArm.cpp do not contradict each
2232 // other, as these are emitted once per-buitlin.
2233 //
2234 // The arguments to be checked and type of each check to be performed must be
2235 // the same. The element types may differ as they will be resolved
2236 // per-intrinsic as overloaded types by SemaArm.cpp, though the vector sizes
2237 // are not and so must be the same.
2238 bool compat =
2239 std::equal(first1: ChecksA.begin(), last1: ChecksA.end(), first2: ChecksB.begin(), last2: ChecksB.end(),
2240 binary_pred: [](const auto &A, const auto &B) {
2241 return A.getImmArgIdx() == B.getImmArgIdx() &&
2242 A.getKind() == B.getKind() &&
2243 A.getVecSizeInBits() == B.getVecSizeInBits();
2244 });
2245
2246 return compat;
2247}
2248
2249void NeonEmitter::genIntrinsicRangeCheckCode(
2250 raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
2251 std::unordered_map<std::string, ArrayRef<ImmCheck>> Emitted;
2252
2253 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2254 for (auto &Def : Defs) {
2255 // If the Def has a body (operation DAGs), it is not a __builtin_neon_
2256 if (Def->hasBody() || !Def->hasImmediate())
2257 continue;
2258
2259 // Sorted by immediate argument index
2260 ArrayRef<ImmCheck> Checks = Def->getImmChecks();
2261
2262 auto [It, Inserted] = Emitted.try_emplace(k: Def->getMangledName(), args&: Checks);
2263 if (!Inserted) {
2264 assert(areRangeChecksCompatible(Checks, It->second) &&
2265 "Neon intrinsics with incompatible immediate range checks cannot "
2266 "share a builtin.");
2267 continue; // Ensure this is emitted only once
2268 }
2269
2270 // Emit builtin's range checks
2271 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
2272 for (const auto &Check : Checks) {
2273 OS << " ImmChecks.emplace_back(" << Check.getImmArgIdx() << ", "
2274 << Check.getKind() << ", " << Check.getElementSizeInBits() << ", "
2275 << Check.getVecSizeInBits() << ");\n"
2276 << " break;\n";
2277 }
2278 }
2279
2280 OS << "#endif\n\n";
2281}
2282
2283/// runHeader - Emit a file with sections defining:
2284/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2285/// 2. the SemaChecking code for the type overload checking.
2286/// 3. the SemaChecking code for validation of intrinsic immediate arguments.
2287void NeonEmitter::runHeader(raw_ostream &OS) {
2288 SmallVector<Intrinsic *, 128> Defs;
2289 for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2290 createIntrinsic(R, Out&: Defs);
2291
2292 // Generate shared BuiltinsXXX.def
2293 genBuiltinsDef(OS, Defs);
2294
2295 // Generate ARM overloaded type checking code for SemaChecking.cpp
2296 genOverloadTypeCheckCode(OS, Defs);
2297
2298 genStreamingSVECompatibleList(OS, Defs);
2299
2300 // Generate ARM range checking code for shift/lane immediates.
2301 genIntrinsicRangeCheckCode(OS, Defs);
2302}
2303
2304static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
2305 std::string TypedefTypes(types);
2306 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(Str: TypedefTypes);
2307
2308 // Emit vector typedefs.
2309 bool InIfdef = false;
2310 for (auto &TS : TDTypeVec) {
2311 bool IsA64 = false;
2312 Type T(TS, ".");
2313 if (T.isDouble() || T.isMFloat8())
2314 IsA64 = true;
2315
2316 if (InIfdef && !IsA64) {
2317 OS << "#endif\n";
2318 InIfdef = false;
2319 }
2320 if (!InIfdef && IsA64) {
2321 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2322 InIfdef = true;
2323 }
2324
2325 if (T.isPoly())
2326 OS << "typedef __attribute__((neon_polyvector_type(";
2327 else
2328 OS << "typedef __attribute__((neon_vector_type(";
2329
2330 Type T2 = T;
2331 T2.makeScalar();
2332 OS << T.getNumElements();
2333 OS << "))) " << T2.str();
2334 OS << " " << T.str() << ";\n";
2335 }
2336 if (InIfdef)
2337 OS << "#endif\n";
2338 OS << "\n";
2339
2340 // Emit struct typedefs.
2341 InIfdef = false;
2342 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
2343 for (auto &TS : TDTypeVec) {
2344 bool IsA64 = false;
2345 Type T(TS, ".");
2346 if (T.isDouble() || T.isMFloat8())
2347 IsA64 = true;
2348
2349 if (InIfdef && !IsA64) {
2350 OS << "#endif\n";
2351 InIfdef = false;
2352 }
2353 if (!InIfdef && IsA64) {
2354 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2355 InIfdef = true;
2356 }
2357
2358 const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
2359 Type VT(TS, Mods);
2360 OS << "typedef struct " << VT.str() << " {\n";
2361 OS << " " << T.str() << " val";
2362 OS << "[" << NumMembers << "]";
2363 OS << ";\n} ";
2364 OS << VT.str() << ";\n";
2365 OS << "\n";
2366 }
2367 }
2368 if (InIfdef)
2369 OS << "#endif\n";
2370}
2371
2372/// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
2373/// is comprised of type definitions and function declarations.
2374void NeonEmitter::run(raw_ostream &OS) {
2375 OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
2376 "------------------------------"
2377 "---===\n"
2378 " *\n"
2379 " * Permission is hereby granted, free of charge, to any person "
2380 "obtaining "
2381 "a copy\n"
2382 " * of this software and associated documentation files (the "
2383 "\"Software\"),"
2384 " to deal\n"
2385 " * in the Software without restriction, including without limitation "
2386 "the "
2387 "rights\n"
2388 " * to use, copy, modify, merge, publish, distribute, sublicense, "
2389 "and/or sell\n"
2390 " * copies of the Software, and to permit persons to whom the Software "
2391 "is\n"
2392 " * furnished to do so, subject to the following conditions:\n"
2393 " *\n"
2394 " * The above copyright notice and this permission notice shall be "
2395 "included in\n"
2396 " * all copies or substantial portions of the Software.\n"
2397 " *\n"
2398 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2399 "EXPRESS OR\n"
2400 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2401 "MERCHANTABILITY,\n"
2402 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2403 "SHALL THE\n"
2404 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2405 "OTHER\n"
2406 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2407 "ARISING FROM,\n"
2408 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2409 "DEALINGS IN\n"
2410 " * THE SOFTWARE.\n"
2411 " *\n"
2412 " *===-----------------------------------------------------------------"
2413 "---"
2414 "---===\n"
2415 " */\n\n";
2416
2417 OS << "#ifndef __ARM_NEON_H\n";
2418 OS << "#define __ARM_NEON_H\n\n";
2419
2420 OS << "#ifndef __ARM_FP\n";
2421 OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
2422 "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
2423 OS << "#else\n\n";
2424
2425 OS << "#include <stdint.h>\n\n";
2426
2427 OS << "#include <arm_bf16.h>\n";
2428
2429 OS << "#include <arm_vector_types.h>\n";
2430
2431 // For now, signedness of polynomial types depends on target
2432 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2433 OS << "typedef uint8_t poly8_t;\n";
2434 OS << "typedef uint16_t poly16_t;\n";
2435 OS << "typedef uint64_t poly64_t;\n";
2436 OS << "typedef __uint128_t poly128_t;\n";
2437 OS << "#else\n";
2438 OS << "typedef int8_t poly8_t;\n";
2439 OS << "typedef int16_t poly16_t;\n";
2440 OS << "typedef int64_t poly64_t;\n";
2441 OS << "#endif\n";
2442 emitNeonTypeDefs(types: "PcQPcPsQPsPlQPl", OS);
2443
2444 OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2445 "__nodebug__))\n\n";
2446
2447 // Shufflevector arguments lists for endian-swapping vectors for big-endian
2448 // targets. For AArch64, we need to reverse every lane in the vector, but for
2449 // AArch32 we need to reverse the lanes within each 64-bit chunk of the
2450 // vector. The naming convention here is __lane_reverse_<n>_<m>, where <n> is
2451 // the length of the vector in bits, and <m> is length of each lane in bits.
2452 OS << "#if !defined(__LITTLE_ENDIAN__)\n";
2453 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2454 OS << "#define __lane_reverse_64_32 1,0\n";
2455 OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2456 OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2457 OS << "#define __lane_reverse_128_64 1,0\n";
2458 OS << "#define __lane_reverse_128_32 3,2,1,0\n";
2459 OS << "#define __lane_reverse_128_16 7,6,5,4,3,2,1,0\n";
2460 OS << "#define __lane_reverse_128_8 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0\n";
2461 OS << "#else\n";
2462 OS << "#define __lane_reverse_64_32 1,0\n";
2463 OS << "#define __lane_reverse_64_16 3,2,1,0\n";
2464 OS << "#define __lane_reverse_64_8 7,6,5,4,3,2,1,0\n";
2465 OS << "#define __lane_reverse_128_64 0,1\n";
2466 OS << "#define __lane_reverse_128_32 1,0,3,2\n";
2467 OS << "#define __lane_reverse_128_16 3,2,1,0,7,6,5,4\n";
2468 OS << "#define __lane_reverse_128_8 7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8\n";
2469 OS << "#endif\n";
2470 OS << "#endif\n";
2471
2472 SmallVector<Intrinsic *, 128> Defs;
2473 for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2474 createIntrinsic(R, Out&: Defs);
2475
2476 for (auto *I : Defs)
2477 I->indexBody();
2478
2479 stable_sort(Range&: Defs, C: deref<std::less<>>());
2480
2481 // Only emit a def when its requirements have been met.
2482 // FIXME: This loop could be made faster, but it's fast enough for now.
2483 bool MadeProgress = true;
2484 std::string InGuard;
2485 while (!Defs.empty() && MadeProgress) {
2486 MadeProgress = false;
2487
2488 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2489 I != Defs.end(); /*No step*/) {
2490 bool DependenciesSatisfied = true;
2491 for (auto *II : (*I)->getDependencies()) {
2492 if (is_contained(Range&: Defs, Element: II))
2493 DependenciesSatisfied = false;
2494 }
2495 if (!DependenciesSatisfied) {
2496 // Try the next one.
2497 ++I;
2498 continue;
2499 }
2500
2501 // Emit #endif/#if pair if needed.
2502 if ((*I)->getArchGuard() != InGuard) {
2503 if (!InGuard.empty())
2504 OS << "#endif\n";
2505 InGuard = (*I)->getArchGuard();
2506 if (!InGuard.empty())
2507 OS << "#if " << InGuard << "\n";
2508 }
2509
2510 // Actually generate the intrinsic code.
2511 OS << (*I)->generate();
2512
2513 MadeProgress = true;
2514 I = Defs.erase(CI: I);
2515 }
2516 }
2517 assert(Defs.empty() && "Some requirements were not satisfied!");
2518 if (!InGuard.empty())
2519 OS << "#endif\n";
2520
2521 OS << "\n";
2522 OS << "#undef __ai\n\n";
2523 OS << "#endif /* if !defined(__ARM_NEON) */\n";
2524 OS << "#endif /* ifndef __ARM_FP */\n";
2525}
2526
2527/// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h
2528/// is comprised of type definitions and function declarations.
2529void NeonEmitter::runFP16(raw_ostream &OS) {
2530 OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
2531 "------------------------------"
2532 "---===\n"
2533 " *\n"
2534 " * Permission is hereby granted, free of charge, to any person "
2535 "obtaining a copy\n"
2536 " * of this software and associated documentation files (the "
2537 "\"Software\"), to deal\n"
2538 " * in the Software without restriction, including without limitation "
2539 "the rights\n"
2540 " * to use, copy, modify, merge, publish, distribute, sublicense, "
2541 "and/or sell\n"
2542 " * copies of the Software, and to permit persons to whom the Software "
2543 "is\n"
2544 " * furnished to do so, subject to the following conditions:\n"
2545 " *\n"
2546 " * The above copyright notice and this permission notice shall be "
2547 "included in\n"
2548 " * all copies or substantial portions of the Software.\n"
2549 " *\n"
2550 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2551 "EXPRESS OR\n"
2552 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2553 "MERCHANTABILITY,\n"
2554 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2555 "SHALL THE\n"
2556 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2557 "OTHER\n"
2558 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2559 "ARISING FROM,\n"
2560 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2561 "DEALINGS IN\n"
2562 " * THE SOFTWARE.\n"
2563 " *\n"
2564 " *===-----------------------------------------------------------------"
2565 "---"
2566 "---===\n"
2567 " */\n\n";
2568
2569 OS << "#ifndef __ARM_FP16_H\n";
2570 OS << "#define __ARM_FP16_H\n\n";
2571
2572 OS << "#include <stdint.h>\n\n";
2573
2574 OS << "typedef __fp16 float16_t;\n";
2575
2576 OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2577 "__nodebug__))\n\n";
2578
2579 SmallVector<Intrinsic *, 128> Defs;
2580 for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2581 createIntrinsic(R, Out&: Defs);
2582
2583 for (auto *I : Defs)
2584 I->indexBody();
2585
2586 stable_sort(Range&: Defs, C: deref<std::less<>>());
2587
2588 // Only emit a def when its requirements have been met.
2589 // FIXME: This loop could be made faster, but it's fast enough for now.
2590 bool MadeProgress = true;
2591 std::string InGuard;
2592 while (!Defs.empty() && MadeProgress) {
2593 MadeProgress = false;
2594
2595 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2596 I != Defs.end(); /*No step*/) {
2597 bool DependenciesSatisfied = true;
2598 for (auto *II : (*I)->getDependencies()) {
2599 if (is_contained(Range&: Defs, Element: II))
2600 DependenciesSatisfied = false;
2601 }
2602 if (!DependenciesSatisfied) {
2603 // Try the next one.
2604 ++I;
2605 continue;
2606 }
2607
2608 // Emit #endif/#if pair if needed.
2609 if ((*I)->getArchGuard() != InGuard) {
2610 if (!InGuard.empty())
2611 OS << "#endif\n";
2612 InGuard = (*I)->getArchGuard();
2613 if (!InGuard.empty())
2614 OS << "#if " << InGuard << "\n";
2615 }
2616
2617 // Actually generate the intrinsic code.
2618 OS << (*I)->generate();
2619
2620 MadeProgress = true;
2621 I = Defs.erase(CI: I);
2622 }
2623 }
2624 assert(Defs.empty() && "Some requirements were not satisfied!");
2625 if (!InGuard.empty())
2626 OS << "#endif\n";
2627
2628 OS << "\n";
2629 OS << "#undef __ai\n\n";
2630 OS << "#endif /* __ARM_FP16_H */\n";
2631}
2632
2633void NeonEmitter::runVectorTypes(raw_ostream &OS) {
2634 OS << "/*===---- arm_vector_types - ARM vector type "
2635 "------===\n"
2636 " *\n"
2637 " *\n"
2638 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2639 "Exceptions.\n"
2640 " * See https://llvm.org/LICENSE.txt for license information.\n"
2641 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2642 " *\n"
2643 " *===-----------------------------------------------------------------"
2644 "------===\n"
2645 " */\n\n";
2646 OS << "#if !defined(__ARM_NEON_H) && !defined(__ARM_SVE_H)\n";
2647 OS << "#error \"This file should not be used standalone. Please include"
2648 " arm_neon.h or arm_sve.h instead\"\n\n";
2649 OS << "#endif\n";
2650 OS << "#ifndef __ARM_NEON_TYPES_H\n";
2651 OS << "#define __ARM_NEON_TYPES_H\n";
2652 OS << "typedef float float32_t;\n";
2653 OS << "typedef __fp16 float16_t;\n";
2654
2655 OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
2656 OS << "typedef __mfp8 mfloat8_t;\n";
2657 OS << "typedef double float64_t;\n";
2658 OS << "#endif\n\n";
2659
2660 OS << R"(
2661typedef uint64_t fpm_t;
2662
2663enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
2664
2665enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
2666
2667static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2668__arm_fpm_init(void) {
2669 return 0;
2670}
2671
2672static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2673__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2674 return (__fpm & ~7ull) | (fpm_t)__format;
2675}
2676
2677static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2678__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2679 return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u);
2680}
2681
2682static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2683__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
2684 return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u);
2685}
2686
2687static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2688__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2689 return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u);
2690}
2691
2692static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2693__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
2694 return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u);
2695}
2696
2697static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2698__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
2699 return (__fpm & ~0x7f0000ull) | (__scale << 16u);
2700}
2701
2702static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2703__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
2704 return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u);
2705}
2706
2707static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
2708__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
2709 return (uint32_t)__fpm | (__scale << 32u);
2710}
2711
2712)";
2713
2714 emitNeonTypeDefs(types: "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlmQmhQhfQfdQd", OS);
2715
2716 emitNeonTypeDefs(types: "bQb", OS);
2717 OS << "#endif // __ARM_NEON_TYPES_H\n";
2718}
2719
2720void NeonEmitter::runBF16(raw_ostream &OS) {
2721 OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
2722 "-----------------------------------===\n"
2723 " *\n"
2724 " *\n"
2725 " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
2726 "Exceptions.\n"
2727 " * See https://llvm.org/LICENSE.txt for license information.\n"
2728 " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
2729 " *\n"
2730 " *===-----------------------------------------------------------------"
2731 "------===\n"
2732 " */\n\n";
2733
2734 OS << "#ifndef __ARM_BF16_H\n";
2735 OS << "#define __ARM_BF16_H\n\n";
2736
2737 OS << "typedef __bf16 bfloat16_t;\n";
2738
2739 OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
2740 "__nodebug__))\n\n";
2741
2742 SmallVector<Intrinsic *, 128> Defs;
2743 for (const Record *R : Records.getAllDerivedDefinitions(ClassName: "Inst"))
2744 createIntrinsic(R, Out&: Defs);
2745
2746 for (auto *I : Defs)
2747 I->indexBody();
2748
2749 stable_sort(Range&: Defs, C: deref<std::less<>>());
2750
2751 // Only emit a def when its requirements have been met.
2752 // FIXME: This loop could be made faster, but it's fast enough for now.
2753 bool MadeProgress = true;
2754 std::string InGuard;
2755 while (!Defs.empty() && MadeProgress) {
2756 MadeProgress = false;
2757
2758 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2759 I != Defs.end(); /*No step*/) {
2760 bool DependenciesSatisfied = true;
2761 for (auto *II : (*I)->getDependencies()) {
2762 if (is_contained(Range&: Defs, Element: II))
2763 DependenciesSatisfied = false;
2764 }
2765 if (!DependenciesSatisfied) {
2766 // Try the next one.
2767 ++I;
2768 continue;
2769 }
2770
2771 // Emit #endif/#if pair if needed.
2772 if ((*I)->getArchGuard() != InGuard) {
2773 if (!InGuard.empty())
2774 OS << "#endif\n";
2775 InGuard = (*I)->getArchGuard();
2776 if (!InGuard.empty())
2777 OS << "#if " << InGuard << "\n";
2778 }
2779
2780 // Actually generate the intrinsic code.
2781 OS << (*I)->generate();
2782
2783 MadeProgress = true;
2784 I = Defs.erase(CI: I);
2785 }
2786 }
2787 assert(Defs.empty() && "Some requirements were not satisfied!");
2788 if (!InGuard.empty())
2789 OS << "#endif\n";
2790
2791 OS << "\n";
2792 OS << "#undef __ai\n\n";
2793
2794 OS << "#endif\n";
2795}
2796
2797void clang::EmitNeon(const RecordKeeper &Records, raw_ostream &OS) {
2798 NeonEmitter(Records).run(OS);
2799}
2800
2801void clang::EmitFP16(const RecordKeeper &Records, raw_ostream &OS) {
2802 NeonEmitter(Records).runFP16(OS);
2803}
2804
2805void clang::EmitBF16(const RecordKeeper &Records, raw_ostream &OS) {
2806 NeonEmitter(Records).runBF16(OS);
2807}
2808
2809void clang::EmitNeonSema(const RecordKeeper &Records, raw_ostream &OS) {
2810 NeonEmitter(Records).runHeader(OS);
2811}
2812
2813void clang::EmitVectorTypes(const RecordKeeper &Records, raw_ostream &OS) {
2814 NeonEmitter(Records).runVectorTypes(OS);
2815}
2816
2817void clang::EmitNeonTest(const RecordKeeper &Records, raw_ostream &OS) {
2818 llvm_unreachable("Neon test generation no longer implemented!");
2819}
2820

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of clang/utils/TableGen/NeonEmitter.cpp