AArch64.cpp source code [lld/ELF/Arch/AArch64.cpp]

1	//===- AArch64.cpp --------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "InputFiles.h"
10	#include "OutputSections.h"
11	#include "Symbols.h"
12	#include "SyntheticSections.h"
13	#include "Target.h"
14	#include "lld/Common/ErrorHandler.h"
15	#include "llvm/BinaryFormat/ELF.h"
16	#include "llvm/Support/Endian.h"
17
18	using namespace llvm;
19	using namespace llvm::support::endian;
20	using namespace llvm::ELF;
21	using namespace lld;
22	using namespace lld::elf;
23
24	// Page(Expr) is the page address of the expression Expr, defined
25	// as (Expr & ~0xFFF). (This applies even if the machine page size
26	// supported by the platform has a different value.)
27	uint64_t elf::getAArch64Page(uint64_t expr) {
28	return expr & ~static_cast<uint64_t>(`0xFFF`);
29	}
30
31	namespace {
32	class AArch64 : public TargetInfo {
33	public:
34	AArch64();
35	RelExpr getRelExpr(RelType type, const Symbol &s,
36	const uint8_t loc) const* override;
37	RelType getDynRel(RelType type) const override;
38	int64_t getImplicitAddend(const uint8_t buf, RelType type) const* override;
39	void writeGotPlt(uint8_t buf, const* Symbol &s) const override;
40	void writeIgotPlt(uint8_t buf, const* Symbol &s) const override;
41	void writePltHeader(uint8_t buf) const* override;
42	void writePlt(uint8_t buf, const* Symbol &sym,
43	uint64_t pltEntryAddr) const override;
44	bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
45	uint64_t branchAddr, const Symbol &s,
46	int64_t a) const override;
47	uint32_t getThunkSectionSpacing() const override;
48	bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
49	bool usesOnlyLowPageBits(RelType type) const override;
50	void relocate(uint8_t loc, const* Relocation &rel,
51	uint64_t val) const override;
52	RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
53	void relocateAlloc(InputSectionBase &sec, uint8_t buf) const* override;
54
55	private:
56	void relaxTlsGdToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
57	void relaxTlsGdToIe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
58	void relaxTlsIeToLe(uint8_t loc, const* Relocation &rel, uint64_t val) const;
59	};
60
61	struct AArch64Relaxer {
62	bool safeToRelaxAdrpLdr = false;
63
64	AArch64Relaxer(ArrayRef<Relocation> relocs);
65	bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
66	uint64_t secAddr, uint8_t buf) const*;
67	bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
68	uint64_t secAddr, uint8_t buf) const*;
69	};
70	} // namespace
71
72	// Return the bits [Start, End] from Val shifted Start bits.
73	// For instance, getBits(0xF0, 4, 8) returns 0xF.
74	static uint64_t getBits(uint64_t val, int start, int end) {
75	uint64_t mask = ((uint64_t)`1` << (end + `1` - start)) - `1`;
76	return (val >> start) & mask;
77	}
78
79	AArch64::AArch64() {
80	copyRel = R_AARCH64_COPY;
81	relativeRel = R_AARCH64_RELATIVE;
82	iRelativeRel = R_AARCH64_IRELATIVE;
83	gotRel = R_AARCH64_GLOB_DAT;
84	pltRel = R_AARCH64_JUMP_SLOT;
85	symbolicRel = R_AARCH64_ABS64;
86	tlsDescRel = R_AARCH64_TLSDESC;
87	tlsGotRel = R_AARCH64_TLS_TPREL64;
88	pltHeaderSize = `32`;
89	pltEntrySize = `16`;
90	ipltEntrySize = `16`;
91	defaultMaxPageSize = `65536`;
92
93	// Align to the 2 MiB page size (known as a superpage or huge page).
94	// FreeBSD automatically promotes 2 MiB-aligned allocations.
95	defaultImageBase = `0x200000`;
96
97	needsThunks = true;
98	}
99
100	RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
101	const uint8_t loc) const* {
102	switch (type) {
103	case R_AARCH64_ABS16:
104	case R_AARCH64_ABS32:
105	case R_AARCH64_ABS64:
106	case R_AARCH64_ADD_ABS_LO12_NC:
107	case R_AARCH64_LDST128_ABS_LO12_NC:
108	case R_AARCH64_LDST16_ABS_LO12_NC:
109	case R_AARCH64_LDST32_ABS_LO12_NC:
110	case R_AARCH64_LDST64_ABS_LO12_NC:
111	case R_AARCH64_LDST8_ABS_LO12_NC:
112	case R_AARCH64_MOVW_SABS_G0:
113	case R_AARCH64_MOVW_SABS_G1:
114	case R_AARCH64_MOVW_SABS_G2:
115	case R_AARCH64_MOVW_UABS_G0:
116	case R_AARCH64_MOVW_UABS_G0_NC:
117	case R_AARCH64_MOVW_UABS_G1:
118	case R_AARCH64_MOVW_UABS_G1_NC:
119	case R_AARCH64_MOVW_UABS_G2:
120	case R_AARCH64_MOVW_UABS_G2_NC:
121	case R_AARCH64_MOVW_UABS_G3:
122	return R_ABS;
123	case R_AARCH64_AUTH_ABS64:
124	return R_AARCH64_AUTH;
125	case R_AARCH64_TLSDESC_ADR_PAGE21:
126	return R_AARCH64_TLSDESC_PAGE;
127	case R_AARCH64_TLSDESC_LD64_LO12:
128	case R_AARCH64_TLSDESC_ADD_LO12:
129	return R_TLSDESC;
130	case R_AARCH64_TLSDESC_CALL:
131	return R_TLSDESC_CALL;
132	case R_AARCH64_TLSLE_ADD_TPREL_HI12:
133	case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
134	case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
135	case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
136	case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
137	case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
138	case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
139	case R_AARCH64_TLSLE_MOVW_TPREL_G0:
140	case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
141	case R_AARCH64_TLSLE_MOVW_TPREL_G1:
142	case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
143	case R_AARCH64_TLSLE_MOVW_TPREL_G2:
144	return R_TPREL;
145	case R_AARCH64_CALL26:
146	case R_AARCH64_CONDBR19:
147	case R_AARCH64_JUMP26:
148	case R_AARCH64_TSTBR14:
149	return R_PLT_PC;
150	case R_AARCH64_PLT32:
151	const_cast<Symbol &>(s).thunkAccessed = true;
152	return R_PLT_PC;
153	case R_AARCH64_PREL16:
154	case R_AARCH64_PREL32:
155	case R_AARCH64_PREL64:
156	case R_AARCH64_ADR_PREL_LO21:
157	case R_AARCH64_LD_PREL_LO19:
158	case R_AARCH64_MOVW_PREL_G0:
159	case R_AARCH64_MOVW_PREL_G0_NC:
160	case R_AARCH64_MOVW_PREL_G1:
161	case R_AARCH64_MOVW_PREL_G1_NC:
162	case R_AARCH64_MOVW_PREL_G2:
163	case R_AARCH64_MOVW_PREL_G2_NC:
164	case R_AARCH64_MOVW_PREL_G3:
165	return R_PC;
166	case R_AARCH64_ADR_PREL_PG_HI21:
167	case R_AARCH64_ADR_PREL_PG_HI21_NC:
168	return R_AARCH64_PAGE_PC;
169	case R_AARCH64_LD64_GOT_LO12_NC:
170	case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
171	return R_GOT;
172	case R_AARCH64_LD64_GOTPAGE_LO15:
173	return R_AARCH64_GOT_PAGE;
174	case R_AARCH64_ADR_GOT_PAGE:
175	case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
176	return R_AARCH64_GOT_PAGE_PC;
177	case R_AARCH64_GOTPCREL32:
178	return R_GOT_PC;
179	case R_AARCH64_NONE:
180	return R_NONE;
181	default:
182	error(msg: getErrorLocation(loc) + "unknown relocation (" + Twine (type) +
183	") against symbol " + toString(s));
184	return R_NONE;
185	}
186	}
187
188	RelExpr AArch64::adjustTlsExpr(RelType type, RelExpr expr) const {
189	if (expr == R_RELAX_TLS_GD_TO_IE) {
190	if (type == R_AARCH64_TLSDESC_ADR_PAGE21)
191	return R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC;
192	return R_RELAX_TLS_GD_TO_IE_ABS;
193	}
194	return expr;
195	}
196
197	bool AArch64::usesOnlyLowPageBits(RelType type) const {
198	switch (type) {
199	default:
200	return false;
201	case R_AARCH64_ADD_ABS_LO12_NC:
202	case R_AARCH64_LD64_GOT_LO12_NC:
203	case R_AARCH64_LDST128_ABS_LO12_NC:
204	case R_AARCH64_LDST16_ABS_LO12_NC:
205	case R_AARCH64_LDST32_ABS_LO12_NC:
206	case R_AARCH64_LDST64_ABS_LO12_NC:
207	case R_AARCH64_LDST8_ABS_LO12_NC:
208	case R_AARCH64_TLSDESC_ADD_LO12:
209	case R_AARCH64_TLSDESC_LD64_LO12:
210	case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
211	return true;
212	}
213	}
214
215	RelType AArch64::getDynRel(RelType type) const {
216	if (type == R_AARCH64_ABS64 \|\| type == R_AARCH64_AUTH_ABS64)
217	return type;
218	return R_AARCH64_NONE;
219	}
220
221	int64_t AArch64::getImplicitAddend(const uint8_t buf, RelType type) const* {
222	switch (type) {
223	case R_AARCH64_TLSDESC:
224	return read64(p: buf + `8`);
225	case R_AARCH64_NONE:
226	case R_AARCH64_GLOB_DAT:
227	case R_AARCH64_JUMP_SLOT:
228	return `0`;
229	case R_AARCH64_ABS16:
230	case R_AARCH64_PREL16:
231	return SignExtend64<`16`>(x: read16(p: buf));
232	case R_AARCH64_ABS32:
233	case R_AARCH64_PREL32:
234	return SignExtend64<`32`>(x: read32(p: buf));
235	case R_AARCH64_ABS64:
236	case R_AARCH64_PREL64:
237	case R_AARCH64_RELATIVE:
238	case R_AARCH64_IRELATIVE:
239	case R_AARCH64_TLS_TPREL64:
240	return read64(p: buf);
241	case R_AARCH64_MOVW_UABS_G0:
242	case R_AARCH64_MOVW_UABS_G0_NC:
243	return getBits(val: SignExtend64<`16`>(x: read16(p: buf)), start: `0`, end: `15`);
244	case R_AARCH64_MOVW_UABS_G1:
245	case R_AARCH64_MOVW_UABS_G1_NC:
246	return getBits(val: SignExtend64<`32`>(x: read32(p: buf)), start: `16`, end: `31`);
247	case R_AARCH64_MOVW_UABS_G2:
248	case R_AARCH64_MOVW_UABS_G2_NC:
249	return getBits(val: read64(p: buf), start: `32`, end: `47`);
250	case R_AARCH64_MOVW_UABS_G3:
251	return getBits(val: read64(p: buf), start: `48`, end: `63`);
252	case R_AARCH64_TSTBR14:
253	return getBits(val: SignExtend64<`32`>(x: read32(p: buf)), start: `2`, end: `15`);
254	case R_AARCH64_CONDBR19:
255	case R_AARCH64_LD_PREL_LO19:
256	return getBits(val: SignExtend64<`32`>(x: read32(p: buf)), start: `2`, end: `20`);
257	case R_AARCH64_ADD_ABS_LO12_NC:
258	return getBits(val: SignExtend64<`16`>(x: read16(p: buf)), start: `0`, end: `11`);
259	case R_AARCH64_ADR_PREL_PG_HI21:
260	case R_AARCH64_ADR_PREL_PG_HI21_NC:
261	return getBits(val: SignExtend64<`32`>(x: read32(p: buf)), start: `12`, end: `32`);
262	case R_AARCH64_JUMP26:
263	case R_AARCH64_CALL26:
264	return getBits(val: SignExtend64<`32`>(x: read32(p: buf)), start: `2`, end: `27`);
265	default:
266	internalLinkerError(loc: getErrorLocation(loc: buf),
267	msg: "cannot read addend for relocation " + toString(type));
268	return `0`;
269	}
270	}
271
272	void AArch64::writeGotPlt(uint8_t buf, const* Symbol &) const {
273	write64(p: buf, v: in.plt ->getVA());
274	}
275
276	void AArch64::writeIgotPlt(uint8_t buf, const* Symbol &s) const {
277	if (config ->writeAddends)
278	write64(p: buf, v: s.getVA());
279	}
280
281	void AArch64::writePltHeader(uint8_t buf) const* {
282	const uint8_t pltData[] = {
283	`0xf0`, `0x7b`, `0xbf`, `0xa9`, // stp x16, x30, [sp,#-16]!
284	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[2]))
285	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[2]))]
286	`0x10`, `0x02`, `0x00`, `0x91`, // add x16, x16, Offset(&(.got.plt[2]))
287	`0x20`, `0x02`, `0x1f`, `0xd6`, // br x17
288	`0x1f`, `0x20`, `0x03`, `0xd5`, // nop
289	`0x1f`, `0x20`, `0x03`, `0xd5`, // nop
290	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
291	};
292	memcpy(dest: buf, src: pltData, n: sizeof(pltData));
293
294	uint64_t got = in.gotPlt ->getVA();
295	uint64_t plt = in.plt ->getVA();
296	relocateNoSym(loc: buf + `4`, type: R_AARCH64_ADR_PREL_PG_HI21,
297	val: getAArch64Page(expr: got + `16`) - getAArch64Page(expr: plt + `4`));
298	relocateNoSym(loc: buf + `8`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + `16`);
299	relocateNoSym(loc: buf + `12`, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + `16`);
300	}
301
302	void AArch64::writePlt(uint8_t buf, const* Symbol &sym,
303	uint64_t pltEntryAddr) const {
304	const uint8_t inst[] = {
305	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[n]))
306	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[n]))]
307	`0x10`, `0x02`, `0x00`, `0x91`, // add x16, x16, Offset(&(.got.plt[n]))
308	`0x20`, `0x02`, `0x1f`, `0xd6` // br x17
309	};
310	memcpy(dest: buf, src: inst, n: sizeof(inst));
311
312	uint64_t gotPltEntryAddr = sym.getGotPltVA();
313	relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
314	val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
315	relocateNoSym(loc: buf + `4`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
316	relocateNoSym(loc: buf + `8`, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
317	}
318
319	bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
320	uint64_t branchAddr, const Symbol &s,
321	int64_t a) const {
322	// If s is an undefined weak symbol and does not have a PLT entry then it will
323	// be resolved as a branch to the next instruction. If it is hidden, its
324	// binding has been converted to local, so we just check isUndefined() here. A
325	// undefined non-weak symbol will have been errored.
326	if (s.isUndefined() && !s.isInPlt())
327	return false;
328	// ELF for the ARM 64-bit architecture, section Call and Jump relocations
329	// only permits range extension thunks for R_AARCH64_CALL26 and
330	// R_AARCH64_JUMP26 relocation types.
331	if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
332	type != R_AARCH64_PLT32)
333	return false;
334	uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(addend: a);
335	return !inBranchRange(type, src: branchAddr, dst);
336	}
337
338	uint32_t AArch64::getThunkSectionSpacing() const {
339	// See comment in Arch/ARM.cpp for a more detailed explanation of
340	// getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
341	// Thunk have a range of +/- 128 MiB
342	return (`128` * `1024` * `1024`) - `0x30000`;
343	}
344
345	bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
346	if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
347	type != R_AARCH64_PLT32)
348	return true;
349	// The AArch64 call and unconditional branch instructions have a range of
350	// +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
351	uint64_t range =
352	type == R_AARCH64_PLT32 ? (UINT64_C(`1`) << `31`) : (`128` * `1024` * `1024`);
353	if (dst > src) {
354	// Immediate of branch is signed.
355	range -= `4`;
356	return dst - src <= range;
357	}
358	return src - dst <= range;
359	}
360
361	static void write32AArch64Addr(uint8_t *l, uint64_t imm) {
362	uint32_t immLo = (imm & `0x3`) << `29`;
363	uint32_t immHi = (imm & `0x1FFFFC`) << `3`;
364	uint64_t mask = (`0x3` << `29`) \| (`0x1FFFFC` << `3`);
365	write32le(P: l, V: (read32le(P: l) & ~mask) \| immLo \| immHi);
366	}
367
368	static void or32le(uint8_t *p, int32_t v) { write32le(P: p, V: read32le(P: p) \| v); }
369
370	// Update the immediate field in a AARCH64 ldr, str, and add instruction.
371	static void or32AArch64Imm(uint8_t *l, uint64_t imm) {
372	or32le(p: l, v: (imm & `0xFFF`) << `10`);
373	}
374
375	// Update the immediate field in an AArch64 movk, movn or movz instruction
376	// for a signed relocation, and update the opcode of a movn or movz instruction
377	// to match the sign of the operand.
378	static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
379	uint32_t inst = read32le(P: loc);
380	// Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
381	if (!(inst & (`1` << `29`))) {
382	// movn or movz.
383	if (imm & `0x10000`) {
384	// Change opcode to movn, which takes an inverted operand.
385	imm ^= `0xFFFF`;
386	inst &= ~(`1` << `30`);
387	} else {
388	// Change opcode to movz.
389	inst \|= `1` << `30`;
390	}
391	}
392	write32le(P: loc, V: inst \| ((imm & `0xFFFF`) << `5`));
393	}
394
395	void AArch64::relocate(uint8_t loc, const* Relocation &rel,
396	uint64_t val) const {
397	switch (rel.type) {
398	case R_AARCH64_ABS16:
399	case R_AARCH64_PREL16:
400	checkIntUInt(loc, v: val, n: `16`, rel);
401	write16(p: loc, v: val);
402	break;
403	case R_AARCH64_ABS32:
404	case R_AARCH64_PREL32:
405	checkIntUInt(loc, v: val, n: `32`, rel);
406	write32(p: loc, v: val);
407	break;
408	case R_AARCH64_PLT32:
409	case R_AARCH64_GOTPCREL32:
410	checkInt(loc, v: val, n: `32`, rel);
411	write32(p: loc, v: val);
412	break;
413	case R_AARCH64_ABS64:
414	// AArch64 relocations to tagged symbols have extended semantics, as
415	// described here:
416	// https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative.
417	// tl;dr: encode the symbol's special addend in the place, which is an
418	// offset to the point where the logical tag is derived from. Quick hack, if
419	// the addend is within the symbol's bounds, no need to encode the tag
420	// derivation offset.
421	if (rel.sym && rel.sym->isTagged() &&
422	(rel.addend < `0` \|\|
423	rel.addend >= static_cast<int64_t>(rel.sym->getSize())))
424	write64(p: loc, v: -rel.addend);
425	else
426	write64(p: loc, v: val);
427	break;
428	case R_AARCH64_PREL64:
429	write64(p: loc, v: val);
430	break;
431	case R_AARCH64_ADD_ABS_LO12_NC:
432	or32AArch64Imm(l: loc, imm: val);
433	break;
434	case R_AARCH64_ADR_GOT_PAGE:
435	case R_AARCH64_ADR_PREL_PG_HI21:
436	case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
437	case R_AARCH64_TLSDESC_ADR_PAGE21:
438	checkInt(loc, v: val, n: `33`, rel);
439	[[fallthrough]];
440	case R_AARCH64_ADR_PREL_PG_HI21_NC:
441	write32AArch64Addr(l: loc, imm: val >> `12`);
442	break;
443	case R_AARCH64_ADR_PREL_LO21:
444	checkInt(loc, v: val, n: `21`, rel);
445	write32AArch64Addr(l: loc, imm: val);
446	break;
447	case R_AARCH64_JUMP26:
448	// Normally we would just write the bits of the immediate field, however
449	// when patching instructions for the cpu errata fix -fix-cortex-a53-843419
450	// we want to replace a non-branch instruction with a branch immediate
451	// instruction. By writing all the bits of the instruction including the
452	// opcode and the immediate (0 001 \| 01 imm26) we can do this
453	// transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
454	// the instruction we want to patch.
455	write32le(P: loc, V: `0x14000000`);
456	[[fallthrough]];
457	case R_AARCH64_CALL26:
458	checkInt(loc, v: val, n: `28`, rel);
459	or32le(p: loc, v: (val & `0x0FFFFFFC`) >> `2`);
460	break;
461	case R_AARCH64_CONDBR19:
462	case R_AARCH64_LD_PREL_LO19:
463	checkAlignment(loc, v: val, n: `4`, rel);
464	checkInt(loc, v: val, n: `21`, rel);
465	or32le(p: loc, v: (val & `0x1FFFFC`) << `3`);
466	break;
467	case R_AARCH64_LDST8_ABS_LO12_NC:
468	case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
469	or32AArch64Imm(l: loc, imm: getBits(val, start: `0`, end: `11`));
470	break;
471	case R_AARCH64_LDST16_ABS_LO12_NC:
472	case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
473	checkAlignment(loc, v: val, n: `2`, rel);
474	or32AArch64Imm(l: loc, imm: getBits(val, start: `1`, end: `11`));
475	break;
476	case R_AARCH64_LDST32_ABS_LO12_NC:
477	case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
478	checkAlignment(loc, v: val, n: `4`, rel);
479	or32AArch64Imm(l: loc, imm: getBits(val, start: `2`, end: `11`));
480	break;
481	case R_AARCH64_LDST64_ABS_LO12_NC:
482	case R_AARCH64_LD64_GOT_LO12_NC:
483	case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
484	case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
485	case R_AARCH64_TLSDESC_LD64_LO12:
486	checkAlignment(loc, v: val, n: `8`, rel);
487	or32AArch64Imm(l: loc, imm: getBits(val, start: `3`, end: `11`));
488	break;
489	case R_AARCH64_LDST128_ABS_LO12_NC:
490	case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
491	checkAlignment(loc, v: val, n: `16`, rel);
492	or32AArch64Imm(l: loc, imm: getBits(val, start: `4`, end: `11`));
493	break;
494	case R_AARCH64_LD64_GOTPAGE_LO15:
495	checkAlignment(loc, v: val, n: `8`, rel);
496	or32AArch64Imm(l: loc, imm: getBits(val, start: `3`, end: `14`));
497	break;
498	case R_AARCH64_MOVW_UABS_G0:
499	checkUInt(loc, v: val, n: `16`, rel);
500	[[fallthrough]];
501	case R_AARCH64_MOVW_UABS_G0_NC:
502	or32le(p: loc, v: (val & `0xFFFF`) << `5`);
503	break;
504	case R_AARCH64_MOVW_UABS_G1:
505	checkUInt(loc, v: val, n: `32`, rel);
506	[[fallthrough]];
507	case R_AARCH64_MOVW_UABS_G1_NC:
508	or32le(p: loc, v: (val & `0xFFFF0000`) >> `11`);
509	break;
510	case R_AARCH64_MOVW_UABS_G2:
511	checkUInt(loc, v: val, n: `48`, rel);
512	[[fallthrough]];
513	case R_AARCH64_MOVW_UABS_G2_NC:
514	or32le(p: loc, v: (val & `0xFFFF00000000`) >> `27`);
515	break;
516	case R_AARCH64_MOVW_UABS_G3:
517	or32le(p: loc, v: (val & `0xFFFF000000000000`) >> `43`);
518	break;
519	case R_AARCH64_MOVW_PREL_G0:
520	case R_AARCH64_MOVW_SABS_G0:
521	case R_AARCH64_TLSLE_MOVW_TPREL_G0:
522	checkInt(loc, v: val, n: `17`, rel);
523	[[fallthrough]];
524	case R_AARCH64_MOVW_PREL_G0_NC:
525	case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
526	writeSMovWImm(loc, imm: val);
527	break;
528	case R_AARCH64_MOVW_PREL_G1:
529	case R_AARCH64_MOVW_SABS_G1:
530	case R_AARCH64_TLSLE_MOVW_TPREL_G1:
531	checkInt(loc, v: val, n: `33`, rel);
532	[[fallthrough]];
533	case R_AARCH64_MOVW_PREL_G1_NC:
534	case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
535	writeSMovWImm(loc, imm: val >> `16`);
536	break;
537	case R_AARCH64_MOVW_PREL_G2:
538	case R_AARCH64_MOVW_SABS_G2:
539	case R_AARCH64_TLSLE_MOVW_TPREL_G2:
540	checkInt(loc, v: val, n: `49`, rel);
541	[[fallthrough]];
542	case R_AARCH64_MOVW_PREL_G2_NC:
543	writeSMovWImm(loc, imm: val >> `32`);
544	break;
545	case R_AARCH64_MOVW_PREL_G3:
546	writeSMovWImm(loc, imm: val >> `48`);
547	break;
548	case R_AARCH64_TSTBR14:
549	checkInt(loc, v: val, n: `16`, rel);
550	or32le(p: loc, v: (val & `0xFFFC`) << `3`);
551	break;
552	case R_AARCH64_TLSLE_ADD_TPREL_HI12:
553	checkUInt(loc, v: val, n: `24`, rel);
554	or32AArch64Imm(l: loc, imm: val >> `12`);
555	break;
556	case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
557	case R_AARCH64_TLSDESC_ADD_LO12:
558	or32AArch64Imm(l: loc, imm: val);
559	break;
560	case R_AARCH64_TLSDESC:
561	// For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
562	write64(p: loc + `8`, v: val);
563	break;
564	default:
565	llvm_unreachable("unknown relocation");
566	}
567	}
568
569	void AArch64::relaxTlsGdToLe(uint8_t loc, const* Relocation &rel,
570	uint64_t val) const {
571	// TLSDESC Global-Dynamic relocation are in the form:
572	// adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
573	// ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
574	// add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
575	// .tlsdesccall [R_AARCH64_TLSDESC_CALL]
576	// blr x1
577	// And it can optimized to:
578	// movz x0, #0x0, lsl #16
579	// movk x0, #0x10
580	// nop
581	// nop
582	checkUInt(loc, v: val, n: `32`, rel);
583
584	switch (rel.type) {
585	case R_AARCH64_TLSDESC_ADD_LO12:
586	case R_AARCH64_TLSDESC_CALL:
587	write32le(P: loc, V: `0xd503201f`); // nop
588	return;
589	case R_AARCH64_TLSDESC_ADR_PAGE21:
590	write32le(P: loc, V: `0xd2a00000` \| (((val >> `16`) & `0xffff`) << `5`)); // movz
591	return;
592	case R_AARCH64_TLSDESC_LD64_LO12:
593	write32le(P: loc, V: `0xf2800000` \| ((val & `0xffff`) << `5`)); // movk
594	return;
595	default:
596	llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
597	}
598	}
599
600	void AArch64::relaxTlsGdToIe(uint8_t loc, const* Relocation &rel,
601	uint64_t val) const {
602	// TLSDESC Global-Dynamic relocation are in the form:
603	// adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
604	// ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
605	// add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
606	// .tlsdesccall [R_AARCH64_TLSDESC_CALL]
607	// blr x1
608	// And it can optimized to:
609	// adrp x0, :gottprel:v
610	// ldr x0, [x0, :gottprel_lo12:v]
611	// nop
612	// nop
613
614	switch (rel.type) {
615	case R_AARCH64_TLSDESC_ADD_LO12:
616	case R_AARCH64_TLSDESC_CALL:
617	write32le(P: loc, V: `0xd503201f`); // nop
618	break;
619	case R_AARCH64_TLSDESC_ADR_PAGE21:
620	write32le(P: loc, V: `0x90000000`); // adrp
621	relocateNoSym(loc, type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
622	break;
623	case R_AARCH64_TLSDESC_LD64_LO12:
624	write32le(P: loc, V: `0xf9400000`); // ldr
625	relocateNoSym(loc, type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
626	break;
627	default:
628	llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
629	}
630	}
631
632	void AArch64::relaxTlsIeToLe(uint8_t loc, const* Relocation &rel,
633	uint64_t val) const {
634	checkUInt(loc, v: val, n: `32`, rel);
635
636	if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
637	// Generate MOVZ.
638	uint32_t regNo = read32le(P: loc) & `0x1f`;
639	write32le(P: loc, V: (`0xd2a00000` \| regNo) \| (((val >> `16`) & `0xffff`) << `5`));
640	return;
641	}
642	if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
643	// Generate MOVK.
644	uint32_t regNo = read32le(P: loc) & `0x1f`;
645	write32le(P: loc, V: (`0xf2800000` \| regNo) \| ((val & `0xffff`) << `5`));
646	return;
647	}
648	llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
649	}
650
651	AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
652	if (!config ->relax)
653	return;
654	// Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
655	// always appear in pairs.
656	size_t i = `0`;
657	const size_t size = relocs.size();
658	for (; i != size; ++i) {
659	if (relocs [i].type == R_AARCH64_ADR_GOT_PAGE) {
660	if (i + `1` < size && relocs [i + `1`].type == R_AARCH64_LD64_GOT_LO12_NC) {
661	++i;
662	continue;
663	}
664	break;
665	} else if (relocs [i].type == R_AARCH64_LD64_GOT_LO12_NC) {
666	break;
667	}
668	}
669	safeToRelaxAdrpLdr = i == size;
670	}
671
672	bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
673	const Relocation &addRel, uint64_t secAddr,
674	uint8_t buf) const* {
675	// When the address of sym is within the range of ADR then
676	// we may relax
677	// ADRP xn, sym
678	// ADD xn, xn, :lo12: sym
679	// to
680	// NOP
681	// ADR xn, sym
682	if (!config ->relax \|\| adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 \|\|
683	addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
684	return false;
685	// Check if the relocations apply to consecutive instructions.
686	if (adrpRel.offset + `4` != addRel.offset)
687	return false;
688	if (adrpRel.sym != addRel.sym)
689	return false;
690	if (adrpRel.addend != `0` \|\| addRel.addend != `0`)
691	return false;
692
693	uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
694	uint32_t addInstr = read32le(P: buf + addRel.offset);
695	// Check if the first instruction is ADRP and the second instruction is ADD.
696	if ((adrpInstr & `0x9f000000`) != `0x90000000` \|\|
697	(addInstr & `0xffc00000`) != `0x91000000`)
698	return false;
699	uint32_t adrpDestReg = adrpInstr & `0x1f`;
700	uint32_t addDestReg = addInstr & `0x1f`;
701	uint32_t addSrcReg = (addInstr >> `5`) & `0x1f`;
702	if (adrpDestReg != addDestReg \|\| adrpDestReg != addSrcReg)
703	return false;
704
705	Symbol &sym = *adrpRel.sym;
706	// Check if the address difference is within 1MiB range.
707	int64_t val = sym.getVA() - (secAddr + addRel.offset);
708	if (val < -`1024` * `1024` \|\| val >= `1024` * `1024`)
709	return false;
710
711	Relocation adrRel = {.expr: R_ABS, .type: R_AARCH64_ADR_PREL_LO21, .offset: addRel.offset,
712	/addend=/`0`, .sym: &sym};
713	// nop
714	write32le(P: buf + adrpRel.offset, V: `0xd503201f`);
715	// adr x_<dest_reg>
716	write32le(P: buf + adrRel.offset, V: `0x10000000` \| adrpDestReg);
717	target->relocate(loc: buf + adrRel.offset, rel: adrRel, val);
718	return true;
719	}
720
721	bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
722	const Relocation &ldrRel, uint64_t secAddr,
723	uint8_t buf) const* {
724	if (!safeToRelaxAdrpLdr)
725	return false;
726
727	// When the definition of sym is not preemptible then we may
728	// be able to relax
729	// ADRP xn, :got: sym
730	// LDR xn, [ xn :got_lo12: sym]
731	// to
732	// ADRP xn, sym
733	// ADD xn, xn, :lo_12: sym
734
735	if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE \|\|
736	ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
737	return false;
738	// Check if the relocations apply to consecutive instructions.
739	if (adrpRel.offset + `4` != ldrRel.offset)
740	return false;
741	// Check if the relocations reference the same symbol and
742	// skip undefined, preemptible and STT_GNU_IFUNC symbols.
743	if (!adrpRel.sym \|\| adrpRel.sym != ldrRel.sym \|\| !adrpRel.sym->isDefined() \|\|
744	adrpRel.sym->isPreemptible \|\| adrpRel.sym->isGnuIFunc())
745	return false;
746	// Check if the addends of the both relocations are zero.
747	if (adrpRel.addend != `0` \|\| ldrRel.addend != `0`)
748	return false;
749	uint32_t adrpInstr = read32le(P: buf + adrpRel.offset);
750	uint32_t ldrInstr = read32le(P: buf + ldrRel.offset);
751	// Check if the first instruction is ADRP and the second instruction is LDR.
752	if ((adrpInstr & `0x9f000000`) != `0x90000000` \|\|
753	(ldrInstr & `0x3b000000`) != `0x39000000`)
754	return false;
755	// Check the value of the sf bit.
756	if (!(ldrInstr >> `31`))
757	return false;
758	uint32_t adrpDestReg = adrpInstr & `0x1f`;
759	uint32_t ldrDestReg = ldrInstr & `0x1f`;
760	uint32_t ldrSrcReg = (ldrInstr >> `5`) & `0x1f`;
761	// Check if ADPR and LDR use the same register.
762	if (adrpDestReg != ldrDestReg \|\| adrpDestReg != ldrSrcReg)
763	return false;
764
765	Symbol &sym = *adrpRel.sym;
766	// GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
767	// position-independent code because these instructions produce a relative
768	// address.
769	if (config ->isPic && !cast<Defined>(Val&: sym).section)
770	return false;
771	// Check if the address difference is within 4GB range.
772	int64_t val =
773	getAArch64Page(expr: sym.getVA()) - getAArch64Page(expr: secAddr + adrpRel.offset);
774	if (val != llvm::SignExtend64(X: val, B: `33`))
775	return false;
776
777	Relocation adrpSymRel = {.expr: R_AARCH64_PAGE_PC, .type: R_AARCH64_ADR_PREL_PG_HI21,
778	.offset: adrpRel.offset, /addend=/`0`, .sym: &sym};
779	Relocation addRel = {.expr: R_ABS, .type: R_AARCH64_ADD_ABS_LO12_NC, .offset: ldrRel.offset,
780	/addend=/`0`, .sym: &sym};
781
782	// adrp x_<dest_reg>
783	write32le(P: buf + adrpSymRel.offset, V: `0x90000000` \| adrpDestReg);
784	// add x_<dest reg>, x_<dest reg>
785	write32le(P: buf + addRel.offset, V: `0x91000000` \| adrpDestReg \| (adrpDestReg << `5`));
786
787	target->relocate(loc: buf + adrpSymRel.offset, rel: adrpSymRel,
788	val: SignExtend64(X: getAArch64Page(expr: sym.getVA()) -
789	getAArch64Page(expr: secAddr + adrpSymRel.offset),
790	B: `64`));
791	target->relocate(loc: buf + addRel.offset, rel: addRel, val: SignExtend64(X: sym.getVA(), B: `64`));
792	tryRelaxAdrpAdd(adrpRel: adrpSymRel, addRel, secAddr, buf);
793	return true;
794	}
795
796	// Tagged symbols have upper address bits that are added by the dynamic loader,
797	// and thus need the full 64-bit GOT entry. Do not relax such symbols.
798	static bool needsGotForMemtag(const Relocation &rel) {
799	return rel.sym->isTagged() && needsGot(expr: rel.expr);
800	}
801
802	void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t buf) const* {
803	uint64_t secAddr = sec.getOutputSection()->addr;
804	if (auto *s = dyn_cast<InputSection>(Val: &sec))
805	secAddr += s->outSecOff;
806	else if (auto *ehIn = dyn_cast<EhInputSection>(Val: &sec))
807	secAddr += ehIn->getParent()->outSecOff;
808	AArch64Relaxer relaxer(sec.relocs());
809	for (size_t i = `0`, size = sec.relocs().size(); i != size; ++i) {
810	const Relocation &rel = sec.relocs()[i];
811	uint8_t *loc = buf + rel.offset;
812	const uint64_t val =
813	sec.getRelocTargetVA(File: sec.file, Type: rel.type, A: rel.addend,
814	P: secAddr + rel.offset, Sym: *rel.sym, Expr: rel.expr);
815
816	if (needsGotForMemtag(rel)) {
817	relocate(loc, rel, val);
818	continue;
819	}
820
821	switch (rel.expr) {
822	case R_AARCH64_GOT_PAGE_PC:
823	if (i + `1` < size &&
824	relaxer.tryRelaxAdrpLdr(adrpRel: rel, ldrRel: sec.relocs()[i + `1`], secAddr, buf)) {
825	++i;
826	continue;
827	}
828	break;
829	case R_AARCH64_PAGE_PC:
830	if (i + `1` < size &&
831	relaxer.tryRelaxAdrpAdd(adrpRel: rel, addRel: sec.relocs()[i + `1`], secAddr, buf)) {
832	++i;
833	continue;
834	}
835	break;
836	case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC:
837	case R_RELAX_TLS_GD_TO_IE_ABS:
838	relaxTlsGdToIe(loc, rel, val);
839	continue;
840	case R_RELAX_TLS_GD_TO_LE:
841	relaxTlsGdToLe(loc, rel, val);
842	continue;
843	case R_RELAX_TLS_IE_TO_LE:
844	relaxTlsIeToLe(loc, rel, val);
845	continue;
846	default:
847	break;
848	}
849	relocate(loc, rel, val);
850	}
851	}
852
853	// AArch64 may use security features in variant PLT sequences. These are:
854	// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
855	// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
856	// in the variant Plt sequences are encoded in the Hint space so they can be
857	// deployed on older architectures, which treat the instructions as a nop.
858	// PAC and BTI can be combined leading to the following combinations:
859	// writePltHeader
860	// writePltHeaderBti (no PAC Header needed)
861	// writePlt
862	// writePltBti (BTI only)
863	// writePltPac (PAC only)
864	// writePltBtiPac (BTI and PAC)
865	//
866	// When PAC is enabled the dynamic loader encrypts the address that it places
867	// in the .got.plt using the pacia1716 instruction which encrypts the value in
868	// x17 using the modifier in x16. The static linker places autia1716 before the
869	// indirect branch to x17 to authenticate the address in x17 with the modifier
870	// in x16. This makes it more difficult for an attacker to modify the value in
871	// the .got.plt.
872	//
873	// When BTI is enabled all indirect branches must land on a bti instruction.
874	// The static linker must place a bti instruction at the start of any PLT entry
875	// that may be the target of an indirect branch. As the PLT entries call the
876	// lazy resolver indirectly this must have a bti instruction at start. In
877	// general a bti instruction is not needed for a PLT entry as indirect calls
878	// are resolved to the function address and not the PLT entry for the function.
879	// There are a small number of cases where the PLT address can escape, such as
880	// taking the address of a function or ifunc via a non got-generating
881	// relocation, and a shared library refers to that symbol.
882	//
883	// We use the bti c variant of the instruction which permits indirect branches
884	// (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
885	// guarantees that all indirect branches from code requiring BTI protection
886	// will go via x16/x17
887
888	namespace {
889	class AArch64BtiPac final : public AArch64 {
890	public:
891	AArch64BtiPac();
892	void writePltHeader(uint8_t buf) const* override;
893	void writePlt(uint8_t buf, const* Symbol &sym,
894	uint64_t pltEntryAddr) const override;
895
896	private:
897	bool btiHeader; // bti instruction needed in PLT Header and Entry
898	bool pacEntry; // autia1716 instruction needed in PLT Entry
899	};
900	} // namespace
901
902	AArch64BtiPac::AArch64BtiPac() {
903	btiHeader = (config ->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
904	// A BTI (Branch Target Indicator) Plt Entry is only required if the
905	// address of the PLT entry can be taken by the program, which permits an
906	// indirect jump to the PLT entry. This can happen when the address
907	// of the PLT entry for a function is canonicalised due to the address of
908	// the function in an executable being taken by a shared library, or
909	// non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
910	// relocations.
911	// The PAC PLT entries require dynamic loader support and this isn't known
912	// from properties in the objects, so we use the command line flag.
913	pacEntry = config ->zPacPlt;
914
915	if (btiHeader \|\| pacEntry) {
916	pltEntrySize = `24`;
917	ipltEntrySize = `24`;
918	}
919	}
920
921	void AArch64BtiPac::writePltHeader(uint8_t buf) const* {
922	const uint8_t btiData[] = { `0x5f`, `0x24`, `0x03`, `0xd5` }; // bti c
923	const uint8_t pltData[] = {
924	`0xf0`, `0x7b`, `0xbf`, `0xa9`, // stp x16, x30, [sp,#-16]!
925	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[2]))
926	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[2]))]
927	`0x10`, `0x02`, `0x00`, `0x91`, // add x16, x16, Offset(&(.got.plt[2]))
928	`0x20`, `0x02`, `0x1f`, `0xd6`, // br x17
929	`0x1f`, `0x20`, `0x03`, `0xd5`, // nop
930	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
931	};
932	const uint8_t nopData[] = { `0x1f`, `0x20`, `0x03`, `0xd5` }; // nop
933
934	uint64_t got = in.gotPlt ->getVA();
935	uint64_t plt = in.plt ->getVA();
936
937	if (btiHeader) {
938	// PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
939	// instruction.
940	memcpy(dest: buf, src: btiData, n: sizeof(btiData));
941	buf += sizeof(btiData);
942	plt += sizeof(btiData);
943	}
944	memcpy(dest: buf, src: pltData, n: sizeof(pltData));
945
946	relocateNoSym(loc: buf + `4`, type: R_AARCH64_ADR_PREL_PG_HI21,
947	val: getAArch64Page(expr: got + `16`) - getAArch64Page(expr: plt + `8`));
948	relocateNoSym(loc: buf + `8`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: got + `16`);
949	relocateNoSym(loc: buf + `12`, type: R_AARCH64_ADD_ABS_LO12_NC, val: got + `16`);
950	if (!btiHeader)
951	// We didn't add the BTI c instruction so round out size with NOP.
952	memcpy(dest: buf + sizeof(pltData), src: nopData, n: sizeof(nopData));
953	}
954
955	void AArch64BtiPac::writePlt(uint8_t buf, const* Symbol &sym,
956	uint64_t pltEntryAddr) const {
957	// The PLT entry is of the form:
958	// [btiData] addrInst (pacBr \| stdBr) [nopData]
959	const uint8_t btiData[] = { `0x5f`, `0x24`, `0x03`, `0xd5` }; // bti c
960	const uint8_t addrInst[] = {
961	`0x10`, `0x00`, `0x00`, `0x90`, // adrp x16, Page(&(.got.plt[n]))
962	`0x11`, `0x02`, `0x40`, `0xf9`, // ldr x17, [x16, Offset(&(.got.plt[n]))]
963	`0x10`, `0x02`, `0x00`, `0x91` // add x16, x16, Offset(&(.got.plt[n]))
964	};
965	const uint8_t pacBr[] = {
966	`0x9f`, `0x21`, `0x03`, `0xd5`, // autia1716
967	`0x20`, `0x02`, `0x1f`, `0xd6` // br x17
968	};
969	const uint8_t stdBr[] = {
970	`0x20`, `0x02`, `0x1f`, `0xd6`, // br x17
971	`0x1f`, `0x20`, `0x03`, `0xd5` // nop
972	};
973	const uint8_t nopData[] = { `0x1f`, `0x20`, `0x03`, `0xd5` }; // nop
974
975	// NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
976	// escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
977	// address may escape if referenced by a direct relocation. If relative
978	// vtables are used then if the vtable is in a shared object the offsets will
979	// be to the PLT entry. The condition is conservative.
980	bool hasBti = btiHeader &&
981	(sym.hasFlag(bit: NEEDS_COPY) \|\| sym.isInIplt \|\| sym.thunkAccessed);
982	if (hasBti) {
983	memcpy(dest: buf, src: btiData, n: sizeof(btiData));
984	buf += sizeof(btiData);
985	pltEntryAddr += sizeof(btiData);
986	}
987
988	uint64_t gotPltEntryAddr = sym.getGotPltVA();
989	memcpy(dest: buf, src: addrInst, n: sizeof(addrInst));
990	relocateNoSym(loc: buf, type: R_AARCH64_ADR_PREL_PG_HI21,
991	val: getAArch64Page(expr: gotPltEntryAddr) - getAArch64Page(expr: pltEntryAddr));
992	relocateNoSym(loc: buf + `4`, type: R_AARCH64_LDST64_ABS_LO12_NC, val: gotPltEntryAddr);
993	relocateNoSym(loc: buf + `8`, type: R_AARCH64_ADD_ABS_LO12_NC, val: gotPltEntryAddr);
994
995	if (pacEntry)
996	memcpy(dest: buf + sizeof(addrInst), src: pacBr, n: sizeof(pacBr));
997	else
998	memcpy(dest: buf + sizeof(addrInst), src: stdBr, n: sizeof(stdBr));
999	if (!hasBti)
1000	// We didn't add the BTI c instruction so round out size with NOP.
1001	memcpy(dest: buf + sizeof(addrInst) + sizeof(stdBr), src: nopData, n: sizeof(nopData));
1002	}
1003
1004	static TargetInfo *getTargetInfo() {
1005	if ((config ->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) \|\|
1006	config ->zPacPlt) {
1007	static AArch64BtiPac t;
1008	return &t;
1009	}
1010	static AArch64 t;
1011	return &t;
1012	}
1013
1014	TargetInfo elf::getAArch64TargetInfo() { return* getTargetInfo(); }
1015
1016	template <class ELFT>
1017	static void
1018	addTaggedSymbolReferences(InputSectionBase &sec,
1019	DenseMap<Symbol , unsigned*> &referenceCount) {
1020	assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
1021
1022	const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>();
1023	if (rels.areRelocsRel())
1024	error(msg: "non-RELA relocations are not allowed with memtag globals");
1025
1026	for (const typename ELFT::Rela &rel : rels.relas) {
1027	Symbol &sym = sec.file->getRelocTargetSym(rel);
1028	// Linker-synthesized symbols such as __executable_start may be referenced
1029	// as tagged in input objfiles, and we don't want them to be tagged. A
1030	// cheap way to exclude them is the type check, but their type is
1031	// STT_NOTYPE. In addition, this save us from checking untaggable symbols,
1032	// like functions or TLS symbols.
1033	if (sym.type != STT_OBJECT)
1034	continue;
1035	// STB_LOCAL symbols can't be referenced from outside the object file, and
1036	// thus don't need to be checked for references from other object files.
1037	if (sym.binding == STB_LOCAL) {
1038	sym.setIsTagged(true);
1039	continue;
1040	}
1041	++referenceCount [&sym];
1042	}
1043	sec.markDead();
1044	}
1045
1046	// A tagged symbol must be denoted as being tagged by all references and the
1047	// chosen definition. For simplicity, here, it must also be denoted as tagged
1048	// for all definitions. Otherwise:
1049	//
1050	// 1. A tagged definition can be used by an untagged declaration, in which case
1051	// the untagged access may be PC-relative, causing a tag mismatch at
1052	// runtime.
1053	// 2. An untagged definition can be used by a tagged declaration, where the
1054	// compiler has taken advantage of the increased alignment of the tagged
1055	// declaration, but the alignment at runtime is wrong, causing a fault.
1056	//
1057	// Ideally, this isn't a problem, as any TU that imports or exports tagged
1058	// symbols should also be built with tagging. But, to handle these cases, we
1059	// demote the symbol to be untagged.
1060	void lld::elf::createTaggedSymbols(const SmallVector<ELFFileBase *, `0`> &files) {
1061	assert(hasMemtag());
1062
1063	// First, collect all symbols that are marked as tagged, and count how many
1064	// times they're marked as tagged.
1065	DenseMap<Symbol , unsigned*> taggedSymbolReferenceCount;
1066	for (InputFile* file : files) {
1067	if (file->kind() != InputFile::ObjKind)
1068	continue;
1069	for (InputSectionBase *section : file->getSections()) {
1070	if (!section \|\| section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC \|\|
1071	section == &InputSection::discarded)
1072	continue;
1073	invokeELFT(addTaggedSymbolReferences, *section,
1074	taggedSymbolReferenceCount);
1075	}
1076	}
1077
1078	// Now, go through all the symbols. If the number of declarations +
1079	// definitions to a symbol exceeds the amount of times they're marked as
1080	// tagged, it means we have an objfile that uses the untagged variant of the
1081	// symbol.
1082	for (InputFile *file : files) {
1083	if (file->kind() != InputFile::BinaryKind &&
1084	file->kind() != InputFile::ObjKind)
1085	continue;
1086
1087	for (Symbol *symbol : file->getSymbols()) {
1088	// See `addTaggedSymbolReferences` for more details.
1089	if (symbol->type != STT_OBJECT \|\|
1090	symbol->binding == STB_LOCAL)
1091	continue;
1092	auto it = taggedSymbolReferenceCount.find(Val: symbol);
1093	if (it == taggedSymbolReferenceCount.end()) continue;
1094	unsigned &remainingAllowedTaggedRefs = it ->second;
1095	if (remainingAllowedTaggedRefs == `0`) {
1096	taggedSymbolReferenceCount.erase(I: it);
1097	continue;
1098	}
1099	--remainingAllowedTaggedRefs;
1100	}
1101	}
1102
1103	// `addTaggedSymbolReferences` has already checked that we have RELA
1104	// relocations, the only other way to get written addends is with
1105	// --apply-dynamic-relocs.
1106	if (!taggedSymbolReferenceCount.empty() && config ->writeAddends)
1107	error(msg: "--apply-dynamic-relocs cannot be used with MTE globals");
1108
1109	// Now, `taggedSymbolReferenceCount` should only contain symbols that are
1110	// defined as tagged exactly the same amount as it's referenced, meaning all
1111	// uses are tagged.
1112	for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) {
1113	assert(remainingTaggedRefs == `0` &&
1114	"Symbol is defined as tagged more times than it's used");
1115	symbol->setIsTagged(true);
1116	}
1117	}
1118

source code of lld/ELF/Arch/AArch64.cpp