ARM64.cpp source code [lld/MachO/Arch/ARM64.cpp]

1	//===- ARM64.cpp ----------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "Arch/ARM64Common.h"
10	#include "InputFiles.h"
11	#include "Symbols.h"
12	#include "SyntheticSections.h"
13	#include "Target.h"
14
15	#include "lld/Common/ErrorHandler.h"
16	#include "mach-o/compact_unwind_encoding.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/BinaryFormat/MachO.h"
19	#include "llvm/Support/Endian.h"
20	#include "llvm/Support/LEB128.h"
21	#include "llvm/Support/MathExtras.h"
22
23	using namespace llvm;
24	using namespace llvm::MachO;
25	using namespace llvm::support::endian;
26	using namespace lld;
27	using namespace lld::macho;
28
29	namespace {
30
31	struct ARM64 : ARM64Common {
32	ARM64();
33	void writeStub(uint8_t buf, const* Symbol &, uint64_t) const override;
34	void writeStubHelperHeader(uint8_t buf) const* override;
35	void writeStubHelperEntry(uint8_t buf, const* Symbol &,
36	uint64_t entryAddr) const override;
37
38	void writeObjCMsgSendStub(uint8_t buf, Symbol sym, uint64_t stubsAddr,
39	uint64_t &stubOffset, uint64_t selrefVA,
40	Symbol objcMsgSend) const* override;
41	void populateThunk(InputSection thunk, Symbol funcSym) override;
42	void applyOptimizationHints(uint8_t , const* ObjFile &) const override;
43
44	void initICFSafeThunkBody(InputSection *thunk,
45	Symbol targetSym) const* override;
46	Symbol getThunkBranchTarget(InputSection thunk) const override;
47	uint32_t getICFSafeThunkSize() const override;
48	};
49
50	} // namespace
51
52	// Random notes on reloc types:
53	// ADDEND always pairs with BRANCH26, PAGE21, or PAGEOFF12
54	// POINTER_TO_GOT: ld64 supports a 4-byte pc-relative form as well as an 8-byte
55	// absolute version of this relocation. The semantics of the absolute relocation
56	// are weird -- it results in the value of the GOT slot being written, instead
57	// of the address. Let's not support it unless we find a real-world use case.
58	static constexpr std::array<RelocAttrs, `11`> relocAttrsArray{._M_elems: {
59	#define B(x) RelocAttrBits::x
60	{.name: "UNSIGNED",
61	B(UNSIGNED) \| B(ABSOLUTE) \| B(EXTERN) \| B(LOCAL) \| B(BYTE4) \| B(BYTE8)},
62	{.name: "SUBTRACTOR", B(SUBTRAHEND) \| B(EXTERN) \| B(BYTE4) \| B(BYTE8)},
63	{.name: "BRANCH26", B(PCREL) \| B(EXTERN) \| B(BRANCH) \| B(BYTE4)},
64	{.name: "PAGE21", B(PCREL) \| B(EXTERN) \| B(BYTE4)},
65	{.name: "PAGEOFF12", B(ABSOLUTE) \| B(EXTERN) \| B(BYTE4)},
66	{.name: "GOT_LOAD_PAGE21", B(PCREL) \| B(EXTERN) \| B(GOT) \| B(BYTE4)},
67	{.name: "GOT_LOAD_PAGEOFF12",
68	B(ABSOLUTE) \| B(EXTERN) \| B(GOT) \| B(LOAD) \| B(BYTE4)},
69	{.name: "POINTER_TO_GOT", B(PCREL) \| B(EXTERN) \| B(GOT) \| B(POINTER) \| B(BYTE4)},
70	{.name: "TLVP_LOAD_PAGE21", B(PCREL) \| B(EXTERN) \| B(TLV) \| B(BYTE4)},
71	{.name: "TLVP_LOAD_PAGEOFF12",
72	B(ABSOLUTE) \| B(EXTERN) \| B(TLV) \| B(LOAD) \| B(BYTE4)},
73	{.name: "ADDEND", B(ADDEND)},
74	#undef B
75	}};
76
77	static constexpr uint32_t stubCode[] = {
78	`0x90000010`, // 00: adrp x16, __la_symbol_ptr@page
79	`0xf9400210`, // 04: ldr x16, [x16, __la_symbol_ptr@pageoff]
80	`0xd61f0200`, // 08: br x16
81	};
82
83	void ARM64::writeStub(uint8_t buf8, const* Symbol &sym,
84	uint64_t pointerVA) const {
85	::writeStub(buf8, stubCode, sym, pointerVA);
86	}
87
88	static constexpr uint32_t stubHelperHeaderCode[] = {
89	`0x90000011`, // 00: adrp x17, _dyld_private@page
90	`0x91000231`, // 04: add x17, x17, _dyld_private@pageoff
91	`0xa9bf47f0`, // 08: stp x16/x17, [sp, #-16]!
92	`0x90000010`, // 0c: adrp x16, dyld_stub_binder@page
93	`0xf9400210`, // 10: ldr x16, [x16, dyld_stub_binder@pageoff]
94	`0xd61f0200`, // 14: br x16
95	};
96
97	void ARM64::writeStubHelperHeader(uint8_t buf8) const* {
98	::writeStubHelperHeader<LP64>(buf8, stubHelperHeaderCode);
99	}
100
101	static constexpr uint32_t stubHelperEntryCode[] = {
102	`0x18000050`, // 00: ldr w16, l0
103	`0x14000000`, // 04: b stubHelperHeader
104	`0x00000000`, // 08: l0: .long 0
105	};
106
107	void ARM64::writeStubHelperEntry(uint8_t buf8, const* Symbol &sym,
108	uint64_t entryVA) const {
109	::writeStubHelperEntry(buf8, stubHelperEntryCode, sym, entryVA);
110	}
111
112	static constexpr uint32_t objcStubsFastCode[] = {
113	`0x90000001`, // adrp x1, __objc_selrefs@page
114	`0xf9400021`, // ldr x1, [x1, @selector("foo")@pageoff]
115	`0x90000010`, // adrp x16, _got@page
116	`0xf9400210`, // ldr x16, [x16, _objc_msgSend@pageoff]
117	`0xd61f0200`, // br x16
118	`0xd4200020`, // brk #0x1
119	`0xd4200020`, // brk #0x1
120	`0xd4200020`, // brk #0x1
121	};
122
123	static constexpr uint32_t objcStubsSmallCode[] = {
124	`0x90000001`, // adrp x1, __objc_selrefs@page
125	`0xf9400021`, // ldr x1, [x1, @selector("foo")@pageoff]
126	`0x14000000`, // b _objc_msgSend
127	};
128
129	void ARM64::writeObjCMsgSendStub(uint8_t buf, Symbol sym, uint64_t stubsAddr,
130	uint64_t &stubOffset, uint64_t selrefVA,
131	Symbol objcMsgSend) const* {
132	uint64_t objcMsgSendAddr;
133	uint64_t objcStubSize;
134	uint64_t objcMsgSendIndex;
135
136	if (config ->objcStubsMode == ObjCStubsMode::fast) {
137	objcStubSize = target->objcStubsFastSize;
138	objcMsgSendAddr = in.got->addr;
139	objcMsgSendIndex = objcMsgSend->gotIndex;
140	::writeObjCMsgSendFastStub<LP64>(buf, objcStubsFastCode, sym, stubsAddr,
141	stubOffset, selrefVA, gotAddr: objcMsgSendAddr,
142	msgSendIndex: objcMsgSendIndex);
143	} else {
144	assert(config ->objcStubsMode == ObjCStubsMode::small);
145	objcStubSize = target->objcStubsSmallSize;
146	if (auto *d = dyn_cast<Defined>(Val: objcMsgSend)) {
147	objcMsgSendAddr = d->getVA();
148	objcMsgSendIndex = `0`;
149	} else {
150	objcMsgSendAddr = in.stubs->addr;
151	objcMsgSendIndex = objcMsgSend->stubsIndex;
152	}
153	::writeObjCMsgSendSmallStub<LP64>(buf, objcStubsSmallCode, sym, stubsAddr,
154	stubOffset, selrefVA, msgSendAddr: objcMsgSendAddr,
155	msgSendIndex: objcMsgSendIndex);
156	}
157	stubOffset += objcStubSize;
158	}
159
160	// A thunk is the relaxed variation of stubCode. We don't need the
161	// extra indirection through a lazy pointer because the target address
162	// is known at link time.
163	static constexpr uint32_t thunkCode[] = {
164	`0x90000010`, // 00: adrp x16, <thunk.ptr>@page
165	`0x91000210`, // 04: add x16, [x16,<thunk.ptr>@pageoff]
166	`0xd61f0200`, // 08: br x16
167	};
168
169	void ARM64::populateThunk(InputSection thunk, Symbol funcSym) {
170	thunk->align = `4`;
171	thunk->data = {reinterpret_cast<const uint8_t *>(thunkCode),
172	sizeof(thunkCode)};
173	thunk->relocs.emplace_back(/type=/args: ARM64_RELOC_PAGEOFF12,
174	/pcrel=/args: false, /length=/args: `2`,
175	/offset=/args: `4`, /addend=/args: `0`,
176	/referent=/args&: funcSym);
177	thunk->relocs.emplace_back(/type=/args: ARM64_RELOC_PAGE21,
178	/pcrel=/args: true, /length=/args: `2`,
179	/offset=/args: `0`, /addend=/args: `0`,
180	/referent=/args&: funcSym);
181	}
182	// Just a single direct branch to the target function.
183	static constexpr uint32_t icfSafeThunkCode[] = {
184	`0x14000000`, // 08: b target
185	};
186
187	void ARM64::initICFSafeThunkBody(InputSection thunk, Symbol targetSym) const {
188	// The base data here will not be itself modified, we'll just be adding a
189	// reloc below. So we can directly use the constexpr above as the data.
190	thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
191	sizeof(icfSafeThunkCode)};
192
193	thunk->relocs.emplace_back(/type=/args: ARM64_RELOC_BRANCH26,
194	/pcrel=/args: true, /length=/args: `2`,
195	/offset=/args: `0`, /addend=/args: `0`,
196	/referent=/args&: targetSym);
197	}
198
199	Symbol ARM64::getThunkBranchTarget(InputSection thunk) const {
200	assert(thunk->relocs.size() == `1` &&
201	"expected a single reloc on ARM64 ICF thunk");
202	auto &reloc = thunk->relocs [`0`];
203	assert(isa<Symbol *>(reloc.referent) &&
204	"ARM64 thunk reloc is expected to point to a Symbol");
205
206	return cast<Symbol *>(Val&: reloc.referent);
207	}
208
209	uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
210
211	ARM64::ARM64() : ARM64Common (LP64 ()) {
212	cpuType = CPU_TYPE_ARM64;
213	cpuSubtype = CPU_SUBTYPE_ARM64_ALL;
214
215	stubSize = sizeof(stubCode);
216	thunkSize = sizeof(thunkCode);
217
218	objcStubsFastSize = sizeof(objcStubsFastCode);
219	objcStubsFastAlignment = `32`;
220	objcStubsSmallSize = sizeof(objcStubsSmallCode);
221	objcStubsSmallAlignment = `4`;
222
223	// Branch immediate is two's complement 26 bits, which is implicitly
224	// multiplied by 4 (since all functions are 4-aligned: The branch range
225	// is -4(2*(26-1))..4(2*(26-1) - 1).
226	backwardBranchRange = `128` * `1024` * `1024`;
227	forwardBranchRange = backwardBranchRange - `4`;
228
229	modeDwarfEncoding = UNWIND_ARM64_MODE_DWARF;
230	subtractorRelocType = ARM64_RELOC_SUBTRACTOR;
231	unsignedRelocType = ARM64_RELOC_UNSIGNED;
232
233	stubHelperHeaderSize = sizeof(stubHelperHeaderCode);
234	stubHelperEntrySize = sizeof(stubHelperEntryCode);
235
236	relocAttrs = {relocAttrsArray.data(), relocAttrsArray.size()};
237	}
238
239	namespace {
240	struct Adrp {
241	uint32_t destRegister;
242	int64_t addend;
243	};
244
245	struct Add {
246	uint8_t destRegister;
247	uint8_t srcRegister;
248	uint32_t addend;
249	};
250
251	enum ExtendType { ZeroExtend = `1`, Sign64 = `2`, Sign32 = `3` };
252
253	struct Ldr {
254	uint8_t destRegister;
255	uint8_t baseRegister;
256	uint8_t p2Size;
257	bool isFloat;
258	ExtendType extendType;
259	int64_t offset;
260	};
261	} // namespace
262
263	static bool parseAdrp(uint32_t insn, Adrp &adrp) {
264	if ((insn & `0x9f000000`) != `0x90000000`)
265	return false;
266	adrp.destRegister = insn & `0x1f`;
267	uint64_t immHi = (insn >> `5`) & `0x7ffff`;
268	uint64_t immLo = (insn >> `29`) & `0x3`;
269	adrp.addend = SignExtend64<`21`>(x: immLo \| (immHi << `2`)) * `4096`;
270	return true;
271	}
272
273	static bool parseAdd(uint32_t insn, Add &add) {
274	if ((insn & `0xffc00000`) != `0x91000000`)
275	return false;
276	add.destRegister = insn & `0x1f`;
277	add.srcRegister = (insn >> `5`) & `0x1f`;
278	add.addend = (insn >> `10`) & `0xfff`;
279	return true;
280	}
281
282	static bool parseLdr(uint32_t insn, Ldr &ldr) {
283	ldr.destRegister = insn & `0x1f`;
284	ldr.baseRegister = (insn >> `5`) & `0x1f`;
285	uint8_t size = insn >> `30`;
286	uint8_t opc = (insn >> `22`) & `3`;
287
288	if ((insn & `0x3fc00000`) == `0x39400000`) {
289	// LDR (immediate), LDRB (immediate), LDRH (immediate)
290	ldr.p2Size = size;
291	ldr.extendType = ZeroExtend;
292	ldr.isFloat = false;
293	} else if ((insn & `0x3f800000`) == `0x39800000`) {
294	// LDRSB (immediate), LDRSH (immediate), LDRSW (immediate)
295	ldr.p2Size = size;
296	ldr.extendType = static_cast<ExtendType>(opc);
297	ldr.isFloat = false;
298	} else if ((insn & `0x3f400000`) == `0x3d400000`) {
299	// LDR (immediate, SIMD&FP)
300	ldr.extendType = ZeroExtend;
301	ldr.isFloat = true;
302	if (opc == `1`)
303	ldr.p2Size = size;
304	else if (size == `0` && opc == `3`)
305	ldr.p2Size = `4`;
306	else
307	return false;
308	} else {
309	return false;
310	}
311	ldr.offset = ((insn >> `10`) & `0xfff`) << ldr.p2Size;
312	return true;
313	}
314
315	static bool isValidAdrOffset(int32_t delta) { return isInt<`21`>(x: delta); }
316
317	static void writeAdr(void *loc, uint32_t dest, int32_t delta) {
318	assert(isValidAdrOffset(delta));
319	uint32_t opcode = `0x10000000`;
320	uint32_t immHi = (delta & `0x001ffffc`) << `3`;
321	uint32_t immLo = (delta & `0x00000003`) << `29`;
322	write32le(P: loc, V: opcode \| immHi \| immLo \| dest);
323	}
324
325	static void writeNop(void *loc) { write32le(P: loc, V: `0xd503201f`); }
326
327	static bool isLiteralLdrEligible(const Ldr &ldr) {
328	return ldr.p2Size > `1` && isShiftedInt<`19`, `2`>(x: ldr.offset);
329	}
330
331	static void writeLiteralLdr(void loc, const* Ldr &ldr) {
332	assert(isLiteralLdrEligible(ldr));
333	uint32_t imm19 = (ldr.offset / `4` & maskTrailingOnes<uint32_t>(N: `19`)) << `5`;
334	uint32_t opcode;
335	switch (ldr.p2Size) {
336	case `2`:
337	if (ldr.isFloat)
338	opcode = `0x1c000000`;
339	else
340	opcode = ldr.extendType == Sign64 ? `0x98000000` : `0x18000000`;
341	break;
342	case `3`:
343	opcode = ldr.isFloat ? `0x5c000000` : `0x58000000`;
344	break;
345	case `4`:
346	opcode = `0x9c000000`;
347	break;
348	default:
349	llvm_unreachable("Invalid literal ldr size");
350	}
351	write32le(P: loc, V: opcode \| imm19 \| ldr.destRegister);
352	}
353
354	static bool isImmediateLdrEligible(const Ldr &ldr) {
355	// Note: We deviate from ld64's behavior, which converts to immediate loads
356	// only if ldr.offset < 4096, even though the offset is divided by the load's
357	// size in the 12-bit immediate operand. Only the unsigned offset variant is
358	// supported.
359
360	uint32_t size = `1` << ldr.p2Size;
361	return ldr.offset >= `0` && (ldr.offset % size) == `0` &&
362	isUInt<`12`>(x: ldr.offset >> ldr.p2Size);
363	}
364
365	static void writeImmediateLdr(void loc, const* Ldr &ldr) {
366	assert(isImmediateLdrEligible(ldr));
367	uint32_t opcode = `0x39000000`;
368	if (ldr.isFloat) {
369	opcode \|= `0x04000000`;
370	assert(ldr.extendType == ZeroExtend);
371	}
372	opcode \|= ldr.destRegister;
373	opcode \|= ldr.baseRegister << `5`;
374	uint8_t size, opc;
375	if (ldr.p2Size == `4`) {
376	size = `0`;
377	opc = `3`;
378	} else {
379	opc = ldr.extendType;
380	size = ldr.p2Size;
381	}
382	uint32_t immBits = ldr.offset >> ldr.p2Size;
383	write32le(P: loc, V: opcode \| (immBits << `10`) \| (opc << `22`) \| (size << `30`));
384	}
385
386	// Transforms a pair of adrp+add instructions into an adr instruction if the
387	// target is within the +/- 1 MiB range allowed by the adr's 21 bit signed
388	// immediate offset.
389	//
390	// adrp xN, _foo@PAGE
391	// add xM, xN, _foo@PAGEOFF
392	// ->
393	// adr xM, _foo
394	// nop
395	static bool applyAdrpAdd(uint8_t buf, const* ConcatInputSection *isec,
396	uint64_t offset1, uint64_t offset2) {
397	uint32_t ins1 = read32le(P: buf + offset1);
398	uint32_t ins2 = read32le(P: buf + offset2);
399	Adrp adrp;
400	Add add;
401	if (!parseAdrp(insn: ins1, adrp) \|\| !parseAdd(insn: ins2, add))
402	return false;
403	if (adrp.destRegister != add.srcRegister)
404	return false;
405
406	uint64_t addr1 = isec->getVA() + offset1;
407	uint64_t referent = pageBits(address: addr1) + adrp.addend + add.addend;
408	int64_t delta = referent - addr1;
409	if (!isValidAdrOffset(delta))
410	return false;
411
412	writeAdr(loc: buf + offset1, dest: add.destRegister, delta);
413	writeNop(loc: buf + offset2);
414	return true;
415	}
416
417	// Transforms two adrp instructions into a single adrp if their referent
418	// addresses are located on the same 4096 byte page.
419	//
420	// adrp xN, _foo@PAGE
421	// adrp xN, _bar@PAGE
422	// ->
423	// adrp xN, _foo@PAGE
424	// nop
425	static void applyAdrpAdrp(uint8_t buf, const* ConcatInputSection *isec,
426	uint64_t offset1, uint64_t offset2) {
427	uint32_t ins1 = read32le(P: buf + offset1);
428	uint32_t ins2 = read32le(P: buf + offset2);
429	Adrp adrp1, adrp2;
430	if (!parseAdrp(insn: ins1, adrp&: adrp1) \|\| !parseAdrp(insn: ins2, adrp&: adrp2))
431	return;
432	if (adrp1.destRegister != adrp2.destRegister)
433	return;
434
435	uint64_t page1 = pageBits(address: offset1 + isec->getVA()) + adrp1.addend;
436	uint64_t page2 = pageBits(address: offset2 + isec->getVA()) + adrp2.addend;
437	if (page1 != page2)
438	return;
439
440	writeNop(loc: buf + offset2);
441	}
442
443	// Transforms a pair of adrp+ldr (immediate) instructions into an ldr (literal)
444	// load from a PC-relative address if it is 4-byte aligned and within +/- 1 MiB,
445	// as ldr can encode a signed 19-bit offset that gets multiplied by 4.
446	//
447	// adrp xN, _foo@PAGE
448	// ldr xM, [xN, _foo@PAGEOFF]
449	// ->
450	// nop
451	// ldr xM, _foo
452	static void applyAdrpLdr(uint8_t buf, const* ConcatInputSection *isec,
453	uint64_t offset1, uint64_t offset2) {
454	uint32_t ins1 = read32le(P: buf + offset1);
455	uint32_t ins2 = read32le(P: buf + offset2);
456	Adrp adrp;
457	Ldr ldr;
458	if (!parseAdrp(insn: ins1, adrp) \|\| !parseLdr(insn: ins2, ldr))
459	return;
460	if (adrp.destRegister != ldr.baseRegister)
461	return;
462
463	uint64_t addr1 = isec->getVA() + offset1;
464	uint64_t addr2 = isec->getVA() + offset2;
465	uint64_t referent = pageBits(address: addr1) + adrp.addend + ldr.offset;
466	ldr.offset = referent - addr2;
467	if (!isLiteralLdrEligible(ldr))
468	return;
469
470	writeNop(loc: buf + offset1);
471	writeLiteralLdr(loc: buf + offset2, ldr);
472	}
473
474	// GOT loads are emitted by the compiler as a pair of adrp and ldr instructions,
475	// but they may be changed to adrp+add by relaxGotLoad(). This hint performs
476	// the AdrpLdr or AdrpAdd transformation depending on whether it was relaxed.
477	static void applyAdrpLdrGot(uint8_t buf, const* ConcatInputSection *isec,
478	uint64_t offset1, uint64_t offset2) {
479	uint32_t ins2 = read32le(P: buf + offset2);
480	Add add;
481	Ldr ldr;
482	if (parseAdd(insn: ins2, add))
483	applyAdrpAdd(buf, isec, offset1, offset2);
484	else if (parseLdr(insn: ins2, ldr))
485	applyAdrpLdr(buf, isec, offset1, offset2);
486	}
487
488	// Optimizes an adrp+add+ldr sequence used for loading from a local symbol's
489	// address by loading directly if it's close enough, or to an adrp(p)+ldr
490	// sequence if it's not.
491	//
492	// adrp x0, _foo@PAGE
493	// add x1, x0, _foo@PAGEOFF
494	// ldr x2, [x1, #off]
495	static void applyAdrpAddLdr(uint8_t buf, const* ConcatInputSection *isec,
496	uint64_t offset1, uint64_t offset2,
497	uint64_t offset3) {
498	uint32_t ins1 = read32le(P: buf + offset1);
499	uint32_t ins2 = read32le(P: buf + offset2);
500	uint32_t ins3 = read32le(P: buf + offset3);
501	Adrp adrp;
502	Add add;
503	Ldr ldr;
504	if (!parseAdrp(insn: ins1, adrp) \|\| !parseAdd(insn: ins2, add) \|\| !parseLdr(insn: ins3, ldr))
505	return;
506	if (adrp.destRegister != add.srcRegister)
507	return;
508	if (add.destRegister != ldr.baseRegister)
509	return;
510
511	// Load from the target address directly.
512	// nop
513	// nop
514	// ldr x2, [_foo + #off]
515	uint64_t addr1 = isec->getVA() + offset1;
516	uint64_t addr3 = isec->getVA() + offset3;
517	uint64_t referent = pageBits(address: addr1) + adrp.addend + add.addend;
518	Ldr literalLdr = ldr;
519	literalLdr.offset += referent - addr3;
520	if (isLiteralLdrEligible(ldr: literalLdr)) {
521	writeNop(loc: buf + offset1);
522	writeNop(loc: buf + offset2);
523	writeLiteralLdr(loc: buf + offset3, ldr: literalLdr);
524	return;
525	}
526
527	if (applyAdrpAdd(buf, isec, offset1, offset2))
528	return;
529
530	// Move the target's page offset into the ldr's immediate offset.
531	// adrp x0, _foo@PAGE
532	// nop
533	// ldr x2, [x0, _foo@PAGEOFF + #off]
534	Ldr immediateLdr = ldr;
535	immediateLdr.baseRegister = adrp.destRegister;
536	immediateLdr.offset += add.addend;
537	if (isImmediateLdrEligible(ldr: immediateLdr)) {
538	writeNop(loc: buf + offset2);
539	writeImmediateLdr(loc: buf + offset3, ldr: immediateLdr);
540	return;
541	}
542	}
543
544	// Relaxes a GOT-indirect load.
545	// If the referenced symbol is external and its GOT entry is within +/- 1 MiB,
546	// the GOT entry can be loaded with a single literal ldr instruction.
547	// If the referenced symbol is local and thus has been relaxed to adrp+add+ldr,
548	// we perform the AdrpAddLdr transformation.
549	static void applyAdrpLdrGotLdr(uint8_t buf, const* ConcatInputSection *isec,
550	uint64_t offset1, uint64_t offset2,
551	uint64_t offset3) {
552	uint32_t ins2 = read32le(P: buf + offset2);
553	Add add;
554	Ldr ldr2;
555
556	if (parseAdd(insn: ins2, add)) {
557	applyAdrpAddLdr(buf, isec, offset1, offset2, offset3);
558	} else if (parseLdr(insn: ins2, ldr&: ldr2)) {
559	// adrp x1, _foo@GOTPAGE
560	// ldr x2, [x1, _foo@GOTPAGEOFF]
561	// ldr x3, [x2, #off]
562	uint32_t ins3 = read32le(P: buf + offset3);
563	Ldr ldr3;
564	if (!parseLdr(insn: ins3, ldr&: ldr3))
565	return;
566	if (ldr3.baseRegister != ldr2.destRegister)
567	return;
568	// Loads from the GOT must be pointer sized.
569	if (ldr2.p2Size != `3` \|\| ldr2.isFloat)
570	return;
571	applyAdrpLdr(buf, isec, offset1, offset2);
572	}
573	}
574
575	template <typename Callback>
576	static void forEachHint(ArrayRef<uint8_t> data, Callback callback) {
577	std::array<uint64_t, `3`> args;
578
579	auto readNext = [&]() -> uint64_t {
580	unsigned int n = `0`;
581	uint64_t value = decodeULEB128(p: data.data(), n: &n, end: data.end());
582	data = data.drop_front(N: n);
583	return value;
584	};
585
586	while (!data.empty()) {
587	uint64_t type = readNext();
588	if (type == `0`)
589	break;
590
591	uint64_t argCount = readNext();
592	for (unsigned i = `0`; i < argCount; ++i) {
593	uint64_t arg = readNext();
594	if (i < `3`)
595	args [i] = arg;
596	}
597	// All known LOH types as of 2022-09 have 3 or fewer arguments; skip others.
598	if (argCount > `3`)
599	continue;
600	callback(type, ArrayRef(args.data(), argCount));
601	}
602	}
603
604	// On RISC architectures like arm64, materializing a memory address generally
605	// takes multiple instructions. If the referenced symbol is located close enough
606	// in memory, fewer instructions are needed.
607	//
608	// Linker optimization hints record where addresses are computed. After
609	// addresses have been assigned, if possible, we change them to a shorter
610	// sequence of instructions. The size of the binary is not modified; the
611	// eliminated instructions are replaced with NOPs. This still leads to faster
612	// code as the CPU can skip over NOPs quickly.
613	//
614	// LOHs are specified by the LC_LINKER_OPTIMIZATION_HINTS load command, which
615	// points to a sequence of ULEB128-encoded numbers. Each entry specifies a
616	// transformation kind, and 2 or 3 addresses where the instructions are located.
617	void ARM64::applyOptimizationHints(uint8_t outBuf, const* ObjFile &obj) const {
618	ArrayRef<uint8_t> data = obj.getOptimizationHints();
619	if (data.empty())
620	return;
621
622	const ConcatInputSection section = nullptr*;
623	uint64_t sectionAddr = `0`;
624	uint8_t buf = nullptr*;
625
626	auto findSection = [&](uint64_t addr) {
627	if (section && addr >= sectionAddr &&
628	addr < sectionAddr + section->getSize())
629	return true;
630
631	if (obj.sections.empty())
632	return false;
633	auto secIt = std::prev(x: llvm::upper_bound(
634	Range: obj.sections, Value&: addr,
635	C: [](uint64_t off, const Section sec) { return* off < sec->addr; }));
636	const Section sec = secIt;
637
638	if (sec->subsections.empty())
639	return false;
640	auto subsecIt = std::prev(x: llvm::upper_bound(
641	Range: sec->subsections, Value: addr - sec->addr,
642	C: [](uint64_t off, Subsection subsec) { return off < subsec.offset; }));
643	const Subsection &subsec = *subsecIt;
644	const ConcatInputSection *isec =
645	dyn_cast_or_null<ConcatInputSection>(Val: subsec.isec);
646	if (!isec \|\| isec->shouldOmitFromOutput())
647	return false;
648
649	section = isec;
650	sectionAddr = subsec.offset + sec->addr;
651	buf = outBuf + section->outSecOff + section->parent->fileOff;
652	return true;
653	};
654
655	auto isValidOffset = [&](uint64_t offset) {
656	if (offset < sectionAddr \|\| offset >= sectionAddr + section->getSize()) {
657	error(msg: toString(file: &obj) +
658	": linker optimization hint spans multiple sections");
659	return false;
660	}
661	return true;
662	};
663
664	bool hasAdrpAdrp = false;
665	forEachHint(data, callback: [&](uint64_t kind, ArrayRef<uint64_t> args) {
666	if (kind == LOH_ARM64_ADRP_ADRP) {
667	hasAdrpAdrp = true;
668	return;
669	}
670
671	if (!findSection (args [`0`]))
672	return;
673	switch (kind) {
674	case LOH_ARM64_ADRP_ADD:
675	if (isValidOffset (args [`1`]))
676	applyAdrpAdd(buf, isec: section, offset1: args [`0`] - sectionAddr,
677	offset2: args [`1`] - sectionAddr);
678	break;
679	case LOH_ARM64_ADRP_LDR:
680	if (isValidOffset (args [`1`]))
681	applyAdrpLdr(buf, isec: section, offset1: args [`0`] - sectionAddr,
682	offset2: args [`1`] - sectionAddr);
683	break;
684	case LOH_ARM64_ADRP_LDR_GOT:
685	if (isValidOffset (args [`1`]))
686	applyAdrpLdrGot(buf, isec: section, offset1: args [`0`] - sectionAddr,
687	offset2: args [`1`] - sectionAddr);
688	break;
689	case LOH_ARM64_ADRP_ADD_LDR:
690	if (isValidOffset (args [`1`]) && isValidOffset (args [`2`]))
691	applyAdrpAddLdr(buf, isec: section, offset1: args [`0`] - sectionAddr,
692	offset2: args [`1`] - sectionAddr, offset3: args [`2`] - sectionAddr);
693	break;
694	case LOH_ARM64_ADRP_LDR_GOT_LDR:
695	if (isValidOffset (args [`1`]) && isValidOffset (args [`2`]))
696	applyAdrpLdrGotLdr(buf, isec: section, offset1: args [`0`] - sectionAddr,
697	offset2: args [`1`] - sectionAddr, offset3: args [`2`] - sectionAddr);
698	break;
699	case LOH_ARM64_ADRP_ADD_STR:
700	case LOH_ARM64_ADRP_LDR_GOT_STR:
701	// TODO: Implement these
702	break;
703	}
704	});
705
706	if (!hasAdrpAdrp)
707	return;
708
709	// AdrpAdrp optimization hints are performed in a second pass because they
710	// might interfere with other transformations. For instance, consider the
711	// following input:
712	//
713	// adrp x0, _foo@PAGE
714	// add x1, x0, _foo@PAGEOFF
715	// adrp x0, _bar@PAGE
716	// add x2, x0, _bar@PAGEOFF
717	//
718	// If we perform the AdrpAdrp relaxation first, we get:
719	//
720	// adrp x0, _foo@PAGE
721	// add x1, x0, _foo@PAGEOFF
722	// nop
723	// add x2, x0, _bar@PAGEOFF
724	//
725	// If we then apply AdrpAdd to the first two instructions, the add will have a
726	// garbage value in x0:
727	//
728	// adr x1, _foo
729	// nop
730	// nop
731	// add x2, x0, _bar@PAGEOFF
732	forEachHint(data, callback: [&](uint64_t kind, ArrayRef<uint64_t> args) {
733	if (kind != LOH_ARM64_ADRP_ADRP)
734	return;
735	if (!findSection (args [`0`]))
736	return;
737	if (isValidOffset (args [`1`]))
738	applyAdrpAdrp(buf, isec: section, offset1: args [`0`] - sectionAddr, offset2: args [`1`] - sectionAddr);
739	});
740	}
741
742	TargetInfo *macho::createARM64TargetInfo() {
743	static ARM64 t;
744	return &t;
745	}
746

source code of lld/MachO/Arch/ARM64.cpp