1 | //===- ARM.cpp ------------------------------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InputFiles.h" |
10 | #include "OutputSections.h" |
11 | #include "SymbolTable.h" |
12 | #include "Symbols.h" |
13 | #include "SyntheticSections.h" |
14 | #include "Target.h" |
15 | #include "lld/Common/Filesystem.h" |
16 | #include "llvm/BinaryFormat/ELF.h" |
17 | #include "llvm/Support/Endian.h" |
18 | |
19 | using namespace llvm; |
20 | using namespace llvm::support::endian; |
21 | using namespace llvm::support; |
22 | using namespace llvm::ELF; |
23 | using namespace lld; |
24 | using namespace lld::elf; |
25 | using namespace llvm::object; |
26 | |
27 | namespace { |
28 | class ARM final : public TargetInfo { |
29 | public: |
30 | ARM(Ctx &); |
31 | uint32_t calcEFlags() const override; |
32 | RelExpr getRelExpr(RelType type, const Symbol &s, |
33 | const uint8_t *loc) const override; |
34 | RelType getDynRel(RelType type) const override; |
35 | int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; |
36 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
37 | void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; |
38 | void writePltHeader(uint8_t *buf) const override; |
39 | void writePlt(uint8_t *buf, const Symbol &sym, |
40 | uint64_t pltEntryAddr) const override; |
41 | void addPltSymbols(InputSection &isec, uint64_t off) const override; |
42 | void addPltHeaderSymbols(InputSection &isd) const override; |
43 | bool needsThunk(RelExpr expr, RelType type, const InputFile *file, |
44 | uint64_t branchAddr, const Symbol &s, |
45 | int64_t a) const override; |
46 | uint32_t getThunkSectionSpacing() const override; |
47 | bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; |
48 | void relocate(uint8_t *loc, const Relocation &rel, |
49 | uint64_t val) const override; |
50 | |
51 | DenseMap<InputSection *, SmallVector<const Defined *, 0>> sectionMap; |
52 | |
53 | private: |
54 | void encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val, |
55 | int group, bool check) const; |
56 | }; |
57 | enum class CodeState { Data = 0, Thumb = 2, Arm = 4 }; |
58 | } // namespace |
59 | |
60 | ARM::ARM(Ctx &ctx) : TargetInfo(ctx) { |
61 | copyRel = R_ARM_COPY; |
62 | relativeRel = R_ARM_RELATIVE; |
63 | iRelativeRel = R_ARM_IRELATIVE; |
64 | gotRel = R_ARM_GLOB_DAT; |
65 | pltRel = R_ARM_JUMP_SLOT; |
66 | symbolicRel = R_ARM_ABS32; |
67 | tlsGotRel = R_ARM_TLS_TPOFF32; |
68 | tlsModuleIndexRel = R_ARM_TLS_DTPMOD32; |
69 | tlsOffsetRel = R_ARM_TLS_DTPOFF32; |
70 | pltHeaderSize = 32; |
71 | pltEntrySize = 16; |
72 | ipltEntrySize = 16; |
73 | trapInstr = {0xd4, 0xd4, 0xd4, 0xd4}; |
74 | needsThunks = true; |
75 | defaultMaxPageSize = 65536; |
76 | } |
77 | |
78 | uint32_t ARM::calcEFlags() const { |
79 | // The ABIFloatType is used by loaders to detect the floating point calling |
80 | // convention. |
81 | uint32_t abiFloatType = 0; |
82 | |
83 | // Set the EF_ARM_BE8 flag in the ELF header, if ELF file is big-endian |
84 | // with BE-8 code. |
85 | uint32_t armBE8 = 0; |
86 | |
87 | if (ctx.arg.armVFPArgs == ARMVFPArgKind::Base || |
88 | ctx.arg.armVFPArgs == ARMVFPArgKind::Default) |
89 | abiFloatType = EF_ARM_ABI_FLOAT_SOFT; |
90 | else if (ctx.arg.armVFPArgs == ARMVFPArgKind::VFP) |
91 | abiFloatType = EF_ARM_ABI_FLOAT_HARD; |
92 | |
93 | if (!ctx.arg.isLE && ctx.arg.armBe8) |
94 | armBE8 = EF_ARM_BE8; |
95 | |
96 | // We don't currently use any features incompatible with EF_ARM_EABI_VER5, |
97 | // but we don't have any firm guarantees of conformance. Linux AArch64 |
98 | // kernels (as of 2016) require an EABI version to be set. |
99 | return EF_ARM_EABI_VER5 | abiFloatType | armBE8; |
100 | } |
101 | |
102 | RelExpr ARM::getRelExpr(RelType type, const Symbol &s, |
103 | const uint8_t *loc) const { |
104 | switch (type) { |
105 | case R_ARM_ABS32: |
106 | case R_ARM_MOVW_ABS_NC: |
107 | case R_ARM_MOVT_ABS: |
108 | case R_ARM_THM_MOVW_ABS_NC: |
109 | case R_ARM_THM_MOVT_ABS: |
110 | case R_ARM_THM_ALU_ABS_G0_NC: |
111 | case R_ARM_THM_ALU_ABS_G1_NC: |
112 | case R_ARM_THM_ALU_ABS_G2_NC: |
113 | case R_ARM_THM_ALU_ABS_G3: |
114 | return R_ABS; |
115 | case R_ARM_THM_JUMP8: |
116 | case R_ARM_THM_JUMP11: |
117 | return R_PC; |
118 | case R_ARM_CALL: |
119 | case R_ARM_JUMP24: |
120 | case R_ARM_PC24: |
121 | case R_ARM_PLT32: |
122 | case R_ARM_PREL31: |
123 | case R_ARM_THM_JUMP19: |
124 | case R_ARM_THM_JUMP24: |
125 | case R_ARM_THM_CALL: |
126 | return R_PLT_PC; |
127 | case R_ARM_GOTOFF32: |
128 | // (S + A) - GOT_ORG |
129 | return R_GOTREL; |
130 | case R_ARM_GOT_BREL: |
131 | // GOT(S) + A - GOT_ORG |
132 | return R_GOT_OFF; |
133 | case R_ARM_GOT_PREL: |
134 | case R_ARM_TLS_IE32: |
135 | // GOT(S) + A - P |
136 | return R_GOT_PC; |
137 | case R_ARM_SBREL32: |
138 | return RE_ARM_SBREL; |
139 | case R_ARM_TARGET1: |
140 | return ctx.arg.target1Rel ? R_PC : R_ABS; |
141 | case R_ARM_TARGET2: |
142 | if (ctx.arg.target2 == Target2Policy::Rel) |
143 | return R_PC; |
144 | if (ctx.arg.target2 == Target2Policy::Abs) |
145 | return R_ABS; |
146 | return R_GOT_PC; |
147 | case R_ARM_TLS_GD32: |
148 | return R_TLSGD_PC; |
149 | case R_ARM_TLS_LDM32: |
150 | return R_TLSLD_PC; |
151 | case R_ARM_TLS_LDO32: |
152 | return R_DTPREL; |
153 | case R_ARM_BASE_PREL: |
154 | // B(S) + A - P |
155 | // FIXME: currently B(S) assumed to be .got, this may not hold for all |
156 | // platforms. |
157 | return R_GOTONLY_PC; |
158 | case R_ARM_MOVW_PREL_NC: |
159 | case R_ARM_MOVT_PREL: |
160 | case R_ARM_REL32: |
161 | case R_ARM_THM_MOVW_PREL_NC: |
162 | case R_ARM_THM_MOVT_PREL: |
163 | return R_PC; |
164 | case R_ARM_ALU_PC_G0: |
165 | case R_ARM_ALU_PC_G0_NC: |
166 | case R_ARM_ALU_PC_G1: |
167 | case R_ARM_ALU_PC_G1_NC: |
168 | case R_ARM_ALU_PC_G2: |
169 | case R_ARM_LDR_PC_G0: |
170 | case R_ARM_LDR_PC_G1: |
171 | case R_ARM_LDR_PC_G2: |
172 | case R_ARM_LDRS_PC_G0: |
173 | case R_ARM_LDRS_PC_G1: |
174 | case R_ARM_LDRS_PC_G2: |
175 | case R_ARM_THM_ALU_PREL_11_0: |
176 | case R_ARM_THM_PC8: |
177 | case R_ARM_THM_PC12: |
178 | return RE_ARM_PCA; |
179 | case R_ARM_MOVW_BREL_NC: |
180 | case R_ARM_MOVW_BREL: |
181 | case R_ARM_MOVT_BREL: |
182 | case R_ARM_THM_MOVW_BREL_NC: |
183 | case R_ARM_THM_MOVW_BREL: |
184 | case R_ARM_THM_MOVT_BREL: |
185 | return RE_ARM_SBREL; |
186 | case R_ARM_NONE: |
187 | return R_NONE; |
188 | case R_ARM_TLS_LE32: |
189 | return R_TPREL; |
190 | case R_ARM_V4BX: |
191 | // V4BX is just a marker to indicate there's a "bx rN" instruction at the |
192 | // given address. It can be used to implement a special linker mode which |
193 | // rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and |
194 | // not ARMv4 output, we can just ignore it. |
195 | return R_NONE; |
196 | default: |
197 | Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation ("<< type.v |
198 | << ") against symbol "<< &s; |
199 | return R_NONE; |
200 | } |
201 | } |
202 | |
203 | RelType ARM::getDynRel(RelType type) const { |
204 | if ((type == R_ARM_ABS32) || (type == R_ARM_TARGET1 && !ctx.arg.target1Rel)) |
205 | return R_ARM_ABS32; |
206 | return R_ARM_NONE; |
207 | } |
208 | |
209 | void ARM::writeGotPlt(uint8_t *buf, const Symbol &) const { |
210 | write32(ctx, p: buf, v: ctx.in.plt->getVA()); |
211 | } |
212 | |
213 | void ARM::writeIgotPlt(uint8_t *buf, const Symbol &s) const { |
214 | // An ARM entry is the address of the ifunc resolver function. |
215 | write32(ctx, p: buf, v: s.getVA(ctx)); |
216 | } |
217 | |
218 | // Long form PLT Header that does not have any restrictions on the displacement |
219 | // of the .plt from the .got.plt. |
220 | static void writePltHeaderLong(Ctx &ctx, uint8_t *buf) { |
221 | write32(ctx, p: buf + 0, v: 0xe52de004); // str lr, [sp,#-4]! |
222 | write32(ctx, p: buf + 4, v: 0xe59fe004); // ldr lr, L2 |
223 | write32(ctx, p: buf + 8, v: 0xe08fe00e); // L1: add lr, pc, lr |
224 | write32(ctx, p: buf + 12, v: 0xe5bef008); // ldr pc, [lr, #8] |
225 | write32(ctx, p: buf + 16, v: 0x00000000); // L2: .word &(.got.plt) - L1 - 8 |
226 | write32(ctx, p: buf + 20, v: 0xd4d4d4d4); // Pad to 32-byte boundary |
227 | write32(ctx, p: buf + 24, v: 0xd4d4d4d4); // Pad to 32-byte boundary |
228 | write32(ctx, p: buf + 28, v: 0xd4d4d4d4); |
229 | uint64_t gotPlt = ctx.in.gotPlt->getVA(); |
230 | uint64_t l1 = ctx.in.plt->getVA() + 8; |
231 | write32(ctx, p: buf + 16, v: gotPlt - l1 - 8); |
232 | } |
233 | |
234 | // True if we should use Thumb PLTs, which currently require Thumb2, and are |
235 | // only used if the target does not have the ARM ISA. |
236 | static bool useThumbPLTs(Ctx &ctx) { |
237 | return ctx.arg.armHasThumb2ISA && !ctx.arg.armHasArmISA; |
238 | } |
239 | |
240 | // The default PLT header requires the .got.plt to be within 128 Mb of the |
241 | // .plt in the positive direction. |
242 | void ARM::writePltHeader(uint8_t *buf) const { |
243 | if (useThumbPLTs(ctx)) { |
244 | // The instruction sequence for thumb: |
245 | // |
246 | // 0: b500 push {lr} |
247 | // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe> |
248 | // 6: 44fe add lr, pc |
249 | // 8: f85e ff08 ldr pc, [lr, #8]! |
250 | // e: .word .got.plt - .plt - 16 |
251 | // |
252 | // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from |
253 | // `pc` in the add instruction and 8 bytes for the `lr` adjustment. |
254 | // |
255 | uint64_t offset = ctx.in.gotPlt->getVA() - ctx.in.plt->getVA() - 16; |
256 | assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset"); |
257 | write16(ctx, p: buf + 0, v: 0xb500); |
258 | // Split into two halves to support endianness correctly. |
259 | write16(ctx, p: buf + 2, v: 0xf8df); |
260 | write16(ctx, p: buf + 4, v: 0xe008); |
261 | write16(ctx, p: buf + 6, v: 0x44fe); |
262 | // Split into two halves to support endianness correctly. |
263 | write16(ctx, p: buf + 8, v: 0xf85e); |
264 | write16(ctx, p: buf + 10, v: 0xff08); |
265 | write32(ctx, p: buf + 12, v: offset); |
266 | |
267 | memcpy(dest: buf + 16, src: trapInstr.data(), n: 4); // Pad to 32-byte boundary |
268 | memcpy(dest: buf + 20, src: trapInstr.data(), n: 4); |
269 | memcpy(dest: buf + 24, src: trapInstr.data(), n: 4); |
270 | memcpy(dest: buf + 28, src: trapInstr.data(), n: 4); |
271 | } else { |
272 | // Use a similar sequence to that in writePlt(), the difference is the |
273 | // calling conventions mean we use lr instead of ip. The PLT entry is |
274 | // responsible for saving lr on the stack, the dynamic loader is responsible |
275 | // for reloading it. |
276 | const uint32_t pltData[] = { |
277 | 0xe52de004, // L1: str lr, [sp,#-4]! |
278 | 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4) |
279 | 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4) |
280 | 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4) |
281 | }; |
282 | |
283 | uint64_t offset = ctx.in.gotPlt->getVA() - ctx.in.plt->getVA() - 4; |
284 | if (!llvm::isUInt<27>(x: offset)) { |
285 | // We cannot encode the Offset, use the long form. |
286 | writePltHeaderLong(ctx, buf); |
287 | return; |
288 | } |
289 | write32(ctx, p: buf + 0, v: pltData[0]); |
290 | write32(ctx, p: buf + 4, v: pltData[1] | ((offset >> 20) & 0xff)); |
291 | write32(ctx, p: buf + 8, v: pltData[2] | ((offset >> 12) & 0xff)); |
292 | write32(ctx, p: buf + 12, v: pltData[3] | (offset & 0xfff)); |
293 | memcpy(dest: buf + 16, src: trapInstr.data(), n: 4); // Pad to 32-byte boundary |
294 | memcpy(dest: buf + 20, src: trapInstr.data(), n: 4); |
295 | memcpy(dest: buf + 24, src: trapInstr.data(), n: 4); |
296 | memcpy(dest: buf + 28, src: trapInstr.data(), n: 4); |
297 | } |
298 | } |
299 | |
300 | void ARM::addPltHeaderSymbols(InputSection &isec) const { |
301 | if (useThumbPLTs(ctx)) { |
302 | addSyntheticLocal(ctx, name: "$t", type: STT_NOTYPE, value: 0, size: 0, section&: isec); |
303 | addSyntheticLocal(ctx, name: "$d", type: STT_NOTYPE, value: 12, size: 0, section&: isec); |
304 | } else { |
305 | addSyntheticLocal(ctx, name: "$a", type: STT_NOTYPE, value: 0, size: 0, section&: isec); |
306 | addSyntheticLocal(ctx, name: "$d", type: STT_NOTYPE, value: 16, size: 0, section&: isec); |
307 | } |
308 | } |
309 | |
310 | // Long form PLT entries that do not have any restrictions on the displacement |
311 | // of the .plt from the .got.plt. |
312 | static void writePltLong(Ctx &ctx, uint8_t *buf, uint64_t gotPltEntryAddr, |
313 | uint64_t pltEntryAddr) { |
314 | write32(ctx, p: buf + 0, v: 0xe59fc004); // ldr ip, L2 |
315 | write32(ctx, p: buf + 4, v: 0xe08cc00f); // L1: add ip, ip, pc |
316 | write32(ctx, p: buf + 8, v: 0xe59cf000); // ldr pc, [ip] |
317 | write32(ctx, p: buf + 12, v: 0x00000000); // L2: .word Offset(&(.got.plt) - L1 - 8 |
318 | uint64_t l1 = pltEntryAddr + 4; |
319 | write32(ctx, p: buf + 12, v: gotPltEntryAddr - l1 - 8); |
320 | } |
321 | |
322 | // The default PLT entries require the .got.plt to be within 128 Mb of the |
323 | // .plt in the positive direction. |
324 | void ARM::writePlt(uint8_t *buf, const Symbol &sym, |
325 | uint64_t pltEntryAddr) const { |
326 | if (!useThumbPLTs(ctx)) { |
327 | uint64_t offset = sym.getGotPltVA(ctx) - pltEntryAddr - 8; |
328 | |
329 | // The PLT entry is similar to the example given in Appendix A of ELF for |
330 | // the Arm Architecture. Instead of using the Group Relocations to find the |
331 | // optimal rotation for the 8-bit immediate used in the add instructions we |
332 | // hard code the most compact rotations for simplicity. This saves a load |
333 | // instruction over the long plt sequences. |
334 | const uint32_t pltData[] = { |
335 | 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8 |
336 | 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8 |
337 | 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8 |
338 | }; |
339 | if (!llvm::isUInt<27>(x: offset)) { |
340 | // We cannot encode the Offset, use the long form. |
341 | writePltLong(ctx, buf, gotPltEntryAddr: sym.getGotPltVA(ctx), pltEntryAddr); |
342 | return; |
343 | } |
344 | write32(ctx, p: buf + 0, v: pltData[0] | ((offset >> 20) & 0xff)); |
345 | write32(ctx, p: buf + 4, v: pltData[1] | ((offset >> 12) & 0xff)); |
346 | write32(ctx, p: buf + 8, v: pltData[2] | (offset & 0xfff)); |
347 | memcpy(dest: buf + 12, src: trapInstr.data(), n: 4); // Pad to 16-byte boundary |
348 | } else { |
349 | uint64_t offset = sym.getGotPltVA(ctx) - pltEntryAddr - 12; |
350 | assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset"); |
351 | |
352 | // A PLT entry will be: |
353 | // |
354 | // movw ip, #<lower 16 bits> |
355 | // movt ip, #<upper 16 bits> |
356 | // add ip, pc |
357 | // L1: ldr.w pc, [ip] |
358 | // b L1 |
359 | // |
360 | // where ip = r12 = 0xc |
361 | |
362 | // movw ip, #<lower 16 bits> |
363 | write16(ctx, p: buf + 2, v: 0x0c00); // use `ip` |
364 | relocateNoSym(loc: buf, type: R_ARM_THM_MOVW_ABS_NC, val: offset); |
365 | |
366 | // movt ip, #<upper 16 bits> |
367 | write16(ctx, p: buf + 6, v: 0x0c00); // use `ip` |
368 | relocateNoSym(loc: buf + 4, type: R_ARM_THM_MOVT_ABS, val: offset); |
369 | |
370 | write16(ctx, p: buf + 8, v: 0x44fc); // add ip, pc |
371 | write16(ctx, p: buf + 10, v: 0xf8dc); // ldr.w pc, [ip] (bottom half) |
372 | write16(ctx, p: buf + 12, v: 0xf000); // ldr.w pc, [ip] (upper half) |
373 | write16(ctx, p: buf + 14, v: 0xe7fc); // Branch to previous instruction |
374 | } |
375 | } |
376 | |
377 | void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { |
378 | if (useThumbPLTs(ctx)) { |
379 | addSyntheticLocal(ctx, name: "$t", type: STT_NOTYPE, value: off, size: 0, section&: isec); |
380 | } else { |
381 | addSyntheticLocal(ctx, name: "$a", type: STT_NOTYPE, value: off, size: 0, section&: isec); |
382 | addSyntheticLocal(ctx, name: "$d", type: STT_NOTYPE, value: off + 12, size: 0, section&: isec); |
383 | } |
384 | } |
385 | |
386 | bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, |
387 | uint64_t branchAddr, const Symbol &s, |
388 | int64_t a) const { |
389 | // If s is an undefined weak symbol and does not have a PLT entry then it will |
390 | // be resolved as a branch to the next instruction. If it is hidden, its |
391 | // binding has been converted to local, so we just check isUndefined() here. A |
392 | // undefined non-weak symbol will have been errored. |
393 | if (s.isUndefined() && !s.isInPlt(ctx)) |
394 | return false; |
395 | // A state change from ARM to Thumb and vice versa must go through an |
396 | // interworking thunk if the relocation type is not R_ARM_CALL or |
397 | // R_ARM_THM_CALL. |
398 | switch (type) { |
399 | case R_ARM_PC24: |
400 | case R_ARM_PLT32: |
401 | case R_ARM_JUMP24: |
402 | // Source is ARM, all PLT entries are ARM so no interworking required. |
403 | // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). |
404 | assert(!useThumbPLTs(ctx) && |
405 | "If the source is ARM, we should not need Thumb PLTs"); |
406 | if (s.isFunc() && expr == R_PC && (s.getVA(ctx) & 1)) |
407 | return true; |
408 | [[fallthrough]]; |
409 | case R_ARM_CALL: { |
410 | uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA(ctx) : s.getVA(ctx); |
411 | return !inBranchRange(type, src: branchAddr, dst: dst + a) || |
412 | (!ctx.arg.armHasBlx && (s.getVA(ctx) & 1)); |
413 | } |
414 | case R_ARM_THM_JUMP19: |
415 | case R_ARM_THM_JUMP24: |
416 | // Source is Thumb, when all PLT entries are ARM interworking is required. |
417 | // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). |
418 | if ((expr == R_PLT_PC && !useThumbPLTs(ctx)) || |
419 | (s.isFunc() && (s.getVA(ctx) & 1) == 0)) |
420 | return true; |
421 | [[fallthrough]]; |
422 | case R_ARM_THM_CALL: { |
423 | uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA(ctx) : s.getVA(ctx); |
424 | return !inBranchRange(type, src: branchAddr, dst: dst + a) || |
425 | (!ctx.arg.armHasBlx && (s.getVA(ctx) & 1) == 0); |
426 | } |
427 | } |
428 | return false; |
429 | } |
430 | |
431 | uint32_t ARM::getThunkSectionSpacing() const { |
432 | // The placing of pre-created ThunkSections is controlled by the value |
433 | // thunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to |
434 | // place the ThunkSection such that all branches from the InputSections |
435 | // prior to the ThunkSection can reach a Thunk placed at the end of the |
436 | // ThunkSection. Graphically: |
437 | // | up to thunkSectionSpacing .text input sections | |
438 | // | ThunkSection | |
439 | // | up to thunkSectionSpacing .text input sections | |
440 | // | ThunkSection | |
441 | |
442 | // Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This |
443 | // is to match the most common expected case of a Thumb 2 encoded BL, BLX or |
444 | // B.W: |
445 | // ARM B, BL, BLX range +/- 32MiB |
446 | // Thumb B.W, BL, BLX range +/- 16MiB |
447 | // Thumb B<cc>.W range +/- 1MiB |
448 | // If a branch cannot reach a pre-created ThunkSection a new one will be |
449 | // created so we can handle the rare cases of a Thumb 2 conditional branch. |
450 | // We intentionally use a lower size for thunkSectionSpacing than the maximum |
451 | // branch range so the end of the ThunkSection is more likely to be within |
452 | // range of the branch instruction that is furthest away. The value we shorten |
453 | // thunkSectionSpacing by is set conservatively to allow us to create 16,384 |
454 | // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to |
455 | // one of the Thunks going out of range. |
456 | |
457 | // On Arm the thunkSectionSpacing depends on the range of the Thumb Branch |
458 | // range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except |
459 | // ARMv6T2) the range is +/- 4MiB. |
460 | |
461 | return (ctx.arg.armJ1J2BranchEncoding) ? 0x1000000 - 0x30000 |
462 | : 0x400000 - 0x7500; |
463 | } |
464 | |
465 | bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { |
466 | if ((dst & 0x1) == 0) |
467 | // Destination is ARM, if ARM caller then Src is already 4-byte aligned. |
468 | // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure |
469 | // destination will be 4 byte aligned. |
470 | src &= ~0x3; |
471 | else |
472 | // Bit 0 == 1 denotes Thumb state, it is not part of the range. |
473 | dst &= ~0x1; |
474 | |
475 | int64_t offset = dst - src; |
476 | switch (type) { |
477 | case R_ARM_PC24: |
478 | case R_ARM_PLT32: |
479 | case R_ARM_JUMP24: |
480 | case R_ARM_CALL: |
481 | return llvm::isInt<26>(x: offset); |
482 | case R_ARM_THM_JUMP19: |
483 | return llvm::isInt<21>(x: offset); |
484 | case R_ARM_THM_JUMP24: |
485 | case R_ARM_THM_CALL: |
486 | return ctx.arg.armJ1J2BranchEncoding ? llvm::isInt<25>(x: offset) |
487 | : llvm::isInt<23>(x: offset); |
488 | default: |
489 | return true; |
490 | } |
491 | } |
492 | |
493 | // Helper to produce message text when LLD detects that a CALL relocation to |
494 | // a non STT_FUNC symbol that may result in incorrect interworking between ARM |
495 | // or Thumb. |
496 | static void stateChangeWarning(Ctx &ctx, uint8_t *loc, RelType relt, |
497 | const Symbol &s) { |
498 | assert(!s.isFunc()); |
499 | const ErrorPlace place = getErrorPlace(ctx, loc); |
500 | std::string hint; |
501 | if (!place.srcLoc.empty()) |
502 | hint = "; "+ place.srcLoc; |
503 | if (s.isSection()) { |
504 | // Section symbols must be defined and in a section. Users cannot change |
505 | // the type. Use the section name as getName() returns an empty string. |
506 | Warn(ctx) << place.loc << "branch and link relocation: "<< relt |
507 | << " to STT_SECTION symbol "<< cast<Defined>(Val: s).section->name |
508 | << " ; interworking not performed"<< hint; |
509 | } else { |
510 | // Warn with hint on how to alter the symbol type. |
511 | Warn(ctx) |
512 | << getErrorLoc(ctx, loc) << "branch and link relocation: "<< relt |
513 | << " to non STT_FUNC symbol: "<< s.getName() |
514 | << " interworking not performed; consider using directive '.type " |
515 | << s.getName() |
516 | << ", %function' to give symbol type STT_FUNC if interworking between " |
517 | "ARM and Thumb is required" |
518 | << hint; |
519 | } |
520 | } |
521 | |
522 | // Rotate a 32-bit unsigned value right by a specified amt of bits. |
523 | static uint32_t rotr32(uint32_t val, uint32_t amt) { |
524 | assert(amt < 32 && "Invalid rotate amount"); |
525 | return (val >> amt) | (val << ((32 - amt) & 31)); |
526 | } |
527 | |
528 | static std::pair<uint32_t, uint32_t> getRemAndLZForGroup(unsigned group, |
529 | uint32_t val) { |
530 | uint32_t rem, lz; |
531 | do { |
532 | lz = llvm::countl_zero(Val: val) & ~1; |
533 | rem = val; |
534 | if (lz == 32) // implies rem == 0 |
535 | break; |
536 | val &= 0xffffff >> lz; |
537 | } while (group--); |
538 | return {rem, lz}; |
539 | } |
540 | |
541 | void ARM::encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val, |
542 | int group, bool check) const { |
543 | // ADD/SUB (immediate) add = bit23, sub = bit22 |
544 | // immediate field carries is a 12-bit modified immediate, made up of a 4-bit |
545 | // even rotate right and an 8-bit immediate. |
546 | uint32_t opcode = 0x00800000; |
547 | if (val >> 63) { |
548 | opcode = 0x00400000; |
549 | val = -val; |
550 | } |
551 | uint32_t imm, lz; |
552 | std::tie(args&: imm, args&: lz) = getRemAndLZForGroup(group, val); |
553 | uint32_t rot = 0; |
554 | if (lz < 24) { |
555 | imm = rotr32(val: imm, amt: 24 - lz); |
556 | rot = (lz + 8) << 7; |
557 | } |
558 | if (check && imm > 0xff) |
559 | Err(ctx) << getErrorLoc(ctx, loc) << "unencodeable immediate "<< val |
560 | << " for relocation "<< rel.type; |
561 | write32(ctx, p: loc, |
562 | v: (read32(ctx, p: loc) & 0xff3ff000) | opcode | rot | (imm & 0xff)); |
563 | } |
564 | |
565 | static void encodeLdrGroup(Ctx &ctx, uint8_t *loc, const Relocation &rel, |
566 | uint64_t val, int group) { |
567 | // R_ARM_LDR_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a |
568 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
569 | // bottom bit to recover S + A - P. |
570 | if (rel.sym->isFunc()) |
571 | val &= ~0x1; |
572 | // LDR (literal) u = bit23 |
573 | uint32_t opcode = 0x00800000; |
574 | if (val >> 63) { |
575 | opcode = 0x0; |
576 | val = -val; |
577 | } |
578 | uint32_t imm = getRemAndLZForGroup(group, val).first; |
579 | checkUInt(ctx, loc, v: imm, n: 12, rel); |
580 | write32(ctx, p: loc, v: (read32(ctx, p: loc) & 0xff7ff000) | opcode | imm); |
581 | } |
582 | |
583 | static void encodeLdrsGroup(Ctx &ctx, uint8_t *loc, const Relocation &rel, |
584 | uint64_t val, int group) { |
585 | // R_ARM_LDRS_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a |
586 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
587 | // bottom bit to recover S + A - P. |
588 | if (rel.sym->isFunc()) |
589 | val &= ~0x1; |
590 | // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 |
591 | uint32_t opcode = 0x00800000; |
592 | if (val >> 63) { |
593 | opcode = 0x0; |
594 | val = -val; |
595 | } |
596 | uint32_t imm = getRemAndLZForGroup(group, val).first; |
597 | checkUInt(ctx, loc, v: imm, n: 8, rel); |
598 | write32(ctx, p: loc, |
599 | v: (read32(ctx, p: loc) & 0xff7ff0f0) | opcode | ((imm & 0xf0) << 4) | |
600 | (imm & 0xf)); |
601 | } |
602 | |
603 | void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { |
604 | switch (rel.type) { |
605 | case R_ARM_ABS32: |
606 | case R_ARM_BASE_PREL: |
607 | case R_ARM_GOTOFF32: |
608 | case R_ARM_GOT_BREL: |
609 | case R_ARM_GOT_PREL: |
610 | case R_ARM_REL32: |
611 | case R_ARM_RELATIVE: |
612 | case R_ARM_SBREL32: |
613 | case R_ARM_TARGET1: |
614 | case R_ARM_TARGET2: |
615 | case R_ARM_TLS_GD32: |
616 | case R_ARM_TLS_IE32: |
617 | case R_ARM_TLS_LDM32: |
618 | case R_ARM_TLS_LDO32: |
619 | case R_ARM_TLS_LE32: |
620 | case R_ARM_TLS_TPOFF32: |
621 | case R_ARM_TLS_DTPOFF32: |
622 | write32(ctx, p: loc, v: val); |
623 | break; |
624 | case R_ARM_PREL31: |
625 | checkInt(ctx, loc, v: val, n: 31, rel); |
626 | write32(ctx, p: loc, v: (read32(ctx, p: loc) & 0x80000000) | (val & ~0x80000000)); |
627 | break; |
628 | case R_ARM_CALL: { |
629 | // R_ARM_CALL is used for BL and BLX instructions, for symbols of type |
630 | // STT_FUNC we choose whether to write a BL or BLX depending on the |
631 | // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is |
632 | // not of type STT_FUNC then we must preserve the original instruction. |
633 | assert(rel.sym); // R_ARM_CALL is always reached via relocate(). |
634 | bool bit0Thumb = val & 1; |
635 | bool isBlx = (read32(ctx, p: loc) & 0xfe000000) == 0xfa000000; |
636 | // lld 10.0 and before always used bit0Thumb when deciding to write a BLX |
637 | // even when type not STT_FUNC. |
638 | if (!rel.sym->isFunc() && isBlx != bit0Thumb) |
639 | stateChangeWarning(ctx, loc, relt: rel.type, s: *rel.sym); |
640 | if (rel.sym->isFunc() ? bit0Thumb : isBlx) { |
641 | // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1' |
642 | checkInt(ctx, loc, v: val, n: 26, rel); |
643 | write32(ctx, p: loc, |
644 | v: 0xfa000000 | // opcode |
645 | ((val & 2) << 23) | // H |
646 | ((val >> 2) & 0x00ffffff)); // imm24 |
647 | break; |
648 | } |
649 | // BLX (always unconditional) instruction to an ARM Target, select an |
650 | // unconditional BL. |
651 | write32(ctx, p: loc, v: 0xeb000000 | (read32(ctx, p: loc) & 0x00ffffff)); |
652 | // fall through as BL encoding is shared with B |
653 | } |
654 | [[fallthrough]]; |
655 | case R_ARM_JUMP24: |
656 | case R_ARM_PC24: |
657 | case R_ARM_PLT32: |
658 | checkInt(ctx, loc, v: val, n: 26, rel); |
659 | write32(ctx, p: loc, |
660 | v: (read32(ctx, p: loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff)); |
661 | break; |
662 | case R_ARM_THM_JUMP8: |
663 | // We do a 9 bit check because val is right-shifted by 1 bit. |
664 | checkInt(ctx, loc, v: val, n: 9, rel); |
665 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & 0xff00) | ((val >> 1) & 0x00ff)); |
666 | break; |
667 | case R_ARM_THM_JUMP11: |
668 | // We do a 12 bit check because val is right-shifted by 1 bit. |
669 | checkInt(ctx, loc, v: val, n: 12, rel); |
670 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & 0xf800) | ((val >> 1) & 0x07ff)); |
671 | break; |
672 | case R_ARM_THM_JUMP19: |
673 | // Encoding T3: Val = S:J2:J1:imm6:imm11:0 |
674 | checkInt(ctx, loc, v: val, n: 21, rel); |
675 | write16(ctx, p: loc, |
676 | v: (read16(ctx, p: loc) & 0xfbc0) | // opcode cond |
677 | ((val >> 10) & 0x0400) | // S |
678 | ((val >> 12) & 0x003f)); // imm6 |
679 | write16(ctx, p: loc + 2, |
680 | v: 0x8000 | // opcode |
681 | ((val >> 8) & 0x0800) | // J2 |
682 | ((val >> 5) & 0x2000) | // J1 |
683 | ((val >> 1) & 0x07ff)); // imm11 |
684 | break; |
685 | case R_ARM_THM_CALL: { |
686 | // R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type |
687 | // STT_FUNC we choose whether to write a BL or BLX depending on the |
688 | // value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is |
689 | // not of type STT_FUNC then we must preserve the original instruction. |
690 | // PLT entries are always ARM state so we know we need to interwork. |
691 | assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). |
692 | bool bit0Thumb = val & 1; |
693 | bool useThumb = bit0Thumb || useThumbPLTs(ctx); |
694 | bool isBlx = (read16(ctx, p: loc + 2) & 0x1000) == 0; |
695 | // lld 10.0 and before always used bit0Thumb when deciding to write a BLX |
696 | // even when type not STT_FUNC. |
697 | if (!rel.sym->isFunc() && !rel.sym->isInPlt(ctx) && isBlx == useThumb) |
698 | stateChangeWarning(ctx, loc, relt: rel.type, s: *rel.sym); |
699 | if ((rel.sym->isFunc() || rel.sym->isInPlt(ctx)) ? !useThumb : isBlx) { |
700 | // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As |
701 | // the BLX instruction may only be two byte aligned. This must be done |
702 | // before overflow check. |
703 | val = alignTo(Value: val, Align: 4); |
704 | write16(ctx, p: loc + 2, v: read16(ctx, p: loc + 2) & ~0x1000); |
705 | } else { |
706 | write16(ctx, p: loc + 2, v: (read16(ctx, p: loc + 2) & ~0x1000) | 1 << 12); |
707 | } |
708 | if (!ctx.arg.armJ1J2BranchEncoding) { |
709 | // Older Arm architectures do not support R_ARM_THM_JUMP24 and have |
710 | // different encoding rules and range due to J1 and J2 always being 1. |
711 | checkInt(ctx, loc, v: val, n: 23, rel); |
712 | write16(ctx, p: loc, |
713 | v: 0xf000 | // opcode |
714 | ((val >> 12) & 0x07ff)); // imm11 |
715 | write16(ctx, p: loc + 2, |
716 | v: (read16(ctx, p: loc + 2) & 0xd000) | // opcode |
717 | 0x2800 | // J1 == J2 == 1 |
718 | ((val >> 1) & 0x07ff)); // imm11 |
719 | break; |
720 | } |
721 | } |
722 | // Fall through as rest of encoding is the same as B.W |
723 | [[fallthrough]]; |
724 | case R_ARM_THM_JUMP24: |
725 | // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0 |
726 | checkInt(ctx, loc, v: val, n: 25, rel); |
727 | write16(ctx, p: loc, |
728 | v: 0xf000 | // opcode |
729 | ((val >> 14) & 0x0400) | // S |
730 | ((val >> 12) & 0x03ff)); // imm10 |
731 | write16(ctx, p: loc + 2, |
732 | v: (read16(ctx, p: loc + 2) & 0xd000) | // opcode |
733 | (((~(val >> 10)) ^ (val >> 11)) & 0x2000) | // J1 |
734 | (((~(val >> 11)) ^ (val >> 13)) & 0x0800) | // J2 |
735 | ((val >> 1) & 0x07ff)); // imm11 |
736 | break; |
737 | case R_ARM_MOVW_ABS_NC: |
738 | case R_ARM_MOVW_PREL_NC: |
739 | case R_ARM_MOVW_BREL_NC: |
740 | write32(ctx, p: loc, |
741 | v: (read32(ctx, p: loc) & ~0x000f0fff) | ((val & 0xf000) << 4) | |
742 | (val & 0x0fff)); |
743 | break; |
744 | case R_ARM_MOVT_ABS: |
745 | case R_ARM_MOVT_PREL: |
746 | case R_ARM_MOVT_BREL: |
747 | write32(ctx, p: loc, |
748 | v: (read32(ctx, p: loc) & ~0x000f0fff) | (((val >> 16) & 0xf000) << 4) | |
749 | ((val >> 16) & 0xfff)); |
750 | break; |
751 | case R_ARM_THM_MOVT_ABS: |
752 | case R_ARM_THM_MOVT_PREL: |
753 | case R_ARM_THM_MOVT_BREL: |
754 | // Encoding T1: A = imm4:i:imm3:imm8 |
755 | |
756 | write16(ctx, p: loc, |
757 | v: 0xf2c0 | // opcode |
758 | ((val >> 17) & 0x0400) | // i |
759 | ((val >> 28) & 0x000f)); // imm4 |
760 | |
761 | write16(ctx, p: loc + 2, |
762 | v: (read16(ctx, p: loc + 2) & 0x8f00) | // opcode |
763 | ((val >> 12) & 0x7000) | // imm3 |
764 | ((val >> 16) & 0x00ff)); // imm8 |
765 | break; |
766 | case R_ARM_THM_MOVW_ABS_NC: |
767 | case R_ARM_THM_MOVW_PREL_NC: |
768 | case R_ARM_THM_MOVW_BREL_NC: |
769 | // Encoding T3: A = imm4:i:imm3:imm8 |
770 | write16(ctx, p: loc, |
771 | v: 0xf240 | // opcode |
772 | ((val >> 1) & 0x0400) | // i |
773 | ((val >> 12) & 0x000f)); // imm4 |
774 | write16(ctx, p: loc + 2, |
775 | v: (read16(ctx, p: loc + 2) & 0x8f00) | // opcode |
776 | ((val << 4) & 0x7000) | // imm3 |
777 | (val & 0x00ff)); // imm8 |
778 | break; |
779 | case R_ARM_THM_ALU_ABS_G3: |
780 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & ~0x00ff) | ((val >> 24) & 0x00ff)); |
781 | break; |
782 | case R_ARM_THM_ALU_ABS_G2_NC: |
783 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & ~0x00ff) | ((val >> 16) & 0x00ff)); |
784 | break; |
785 | case R_ARM_THM_ALU_ABS_G1_NC: |
786 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & ~0x00ff) | ((val >> 8) & 0x00ff)); |
787 | break; |
788 | case R_ARM_THM_ALU_ABS_G0_NC: |
789 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & ~0x00ff) | (val & 0x00ff)); |
790 | break; |
791 | case R_ARM_ALU_PC_G0: |
792 | encodeAluGroup(loc, rel, val, group: 0, check: true); |
793 | break; |
794 | case R_ARM_ALU_PC_G0_NC: |
795 | encodeAluGroup(loc, rel, val, group: 0, check: false); |
796 | break; |
797 | case R_ARM_ALU_PC_G1: |
798 | encodeAluGroup(loc, rel, val, group: 1, check: true); |
799 | break; |
800 | case R_ARM_ALU_PC_G1_NC: |
801 | encodeAluGroup(loc, rel, val, group: 1, check: false); |
802 | break; |
803 | case R_ARM_ALU_PC_G2: |
804 | encodeAluGroup(loc, rel, val, group: 2, check: true); |
805 | break; |
806 | case R_ARM_LDR_PC_G0: |
807 | encodeLdrGroup(ctx, loc, rel, val, group: 0); |
808 | break; |
809 | case R_ARM_LDR_PC_G1: |
810 | encodeLdrGroup(ctx, loc, rel, val, group: 1); |
811 | break; |
812 | case R_ARM_LDR_PC_G2: |
813 | encodeLdrGroup(ctx, loc, rel, val, group: 2); |
814 | break; |
815 | case R_ARM_LDRS_PC_G0: |
816 | encodeLdrsGroup(ctx, loc, rel, val, group: 0); |
817 | break; |
818 | case R_ARM_LDRS_PC_G1: |
819 | encodeLdrsGroup(ctx, loc, rel, val, group: 1); |
820 | break; |
821 | case R_ARM_LDRS_PC_G2: |
822 | encodeLdrsGroup(ctx, loc, rel, val, group: 2); |
823 | break; |
824 | case R_ARM_THM_ALU_PREL_11_0: { |
825 | // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 |
826 | int64_t imm = val; |
827 | uint16_t sub = 0; |
828 | if (imm < 0) { |
829 | imm = -imm; |
830 | sub = 0x00a0; |
831 | } |
832 | checkUInt(ctx, loc, v: imm, n: 12, rel); |
833 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & 0xfb0f) | sub | (imm & 0x800) >> 1); |
834 | write16(ctx, p: loc + 2, |
835 | v: (read16(ctx, p: loc + 2) & 0x8f00) | (imm & 0x700) << 4 | |
836 | (imm & 0xff)); |
837 | break; |
838 | } |
839 | case R_ARM_THM_PC8: |
840 | // ADR and LDR literal encoding T1 positive offset only imm8:00 |
841 | // R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a |
842 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
843 | // bottom bit to recover S + A - Pa. |
844 | if (rel.sym->isFunc()) |
845 | val &= ~0x1; |
846 | checkUInt(ctx, loc, v: val, n: 10, rel); |
847 | checkAlignment(ctx, loc, v: val, n: 4, rel); |
848 | write16(ctx, p: loc, v: (read16(ctx, p: loc) & 0xff00) | (val & 0x3fc) >> 2); |
849 | break; |
850 | case R_ARM_THM_PC12: { |
851 | // LDR (literal) encoding T2, add = (U == '1') imm12 |
852 | // imm12 is unsigned |
853 | // R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a |
854 | // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear |
855 | // bottom bit to recover S + A - Pa. |
856 | if (rel.sym->isFunc()) |
857 | val &= ~0x1; |
858 | int64_t imm12 = val; |
859 | uint16_t u = 0x0080; |
860 | if (imm12 < 0) { |
861 | imm12 = -imm12; |
862 | u = 0; |
863 | } |
864 | checkUInt(ctx, loc, v: imm12, n: 12, rel); |
865 | write16(ctx, p: loc, v: read16(ctx, p: loc) | u); |
866 | write16(ctx, p: loc + 2, v: (read16(ctx, p: loc + 2) & 0xf000) | imm12); |
867 | break; |
868 | } |
869 | default: |
870 | llvm_unreachable("unknown relocation"); |
871 | } |
872 | } |
873 | |
874 | int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const { |
875 | switch (type) { |
876 | default: |
877 | InternalErr(ctx, buf) << "cannot read addend for relocation "<< type; |
878 | return 0; |
879 | case R_ARM_ABS32: |
880 | case R_ARM_BASE_PREL: |
881 | case R_ARM_GLOB_DAT: |
882 | case R_ARM_GOTOFF32: |
883 | case R_ARM_GOT_BREL: |
884 | case R_ARM_GOT_PREL: |
885 | case R_ARM_IRELATIVE: |
886 | case R_ARM_REL32: |
887 | case R_ARM_RELATIVE: |
888 | case R_ARM_SBREL32: |
889 | case R_ARM_TARGET1: |
890 | case R_ARM_TARGET2: |
891 | case R_ARM_TLS_DTPMOD32: |
892 | case R_ARM_TLS_DTPOFF32: |
893 | case R_ARM_TLS_GD32: |
894 | case R_ARM_TLS_IE32: |
895 | case R_ARM_TLS_LDM32: |
896 | case R_ARM_TLS_LE32: |
897 | case R_ARM_TLS_LDO32: |
898 | case R_ARM_TLS_TPOFF32: |
899 | return SignExtend64<32>(x: read32(ctx, p: buf)); |
900 | case R_ARM_PREL31: |
901 | return SignExtend64<31>(x: read32(ctx, p: buf)); |
902 | case R_ARM_CALL: |
903 | case R_ARM_JUMP24: |
904 | case R_ARM_PC24: |
905 | case R_ARM_PLT32: |
906 | return SignExtend64<26>(x: read32(ctx, p: buf) << 2); |
907 | case R_ARM_THM_JUMP8: |
908 | return SignExtend64<9>(x: read16(ctx, p: buf) << 1); |
909 | case R_ARM_THM_JUMP11: |
910 | return SignExtend64<12>(x: read16(ctx, p: buf) << 1); |
911 | case R_ARM_THM_JUMP19: { |
912 | // Encoding T3: A = S:J2:J1:imm10:imm6:0 |
913 | uint16_t hi = read16(ctx, p: buf); |
914 | uint16_t lo = read16(ctx, p: buf + 2); |
915 | return SignExtend64<20>(x: ((hi & 0x0400) << 10) | // S |
916 | ((lo & 0x0800) << 8) | // J2 |
917 | ((lo & 0x2000) << 5) | // J1 |
918 | ((hi & 0x003f) << 12) | // imm6 |
919 | ((lo & 0x07ff) << 1)); // imm11:0 |
920 | } |
921 | case R_ARM_THM_CALL: |
922 | if (!ctx.arg.armJ1J2BranchEncoding) { |
923 | // Older Arm architectures do not support R_ARM_THM_JUMP24 and have |
924 | // different encoding rules and range due to J1 and J2 always being 1. |
925 | uint16_t hi = read16(ctx, p: buf); |
926 | uint16_t lo = read16(ctx, p: buf + 2); |
927 | return SignExtend64<22>(x: ((hi & 0x7ff) << 12) | // imm11 |
928 | ((lo & 0x7ff) << 1)); // imm11:0 |
929 | break; |
930 | } |
931 | [[fallthrough]]; |
932 | case R_ARM_THM_JUMP24: { |
933 | // Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0 |
934 | // I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S) |
935 | uint16_t hi = read16(ctx, p: buf); |
936 | uint16_t lo = read16(ctx, p: buf + 2); |
937 | return SignExtend64<24>(x: ((hi & 0x0400) << 14) | // S |
938 | (~((lo ^ (hi << 3)) << 10) & 0x00800000) | // I1 |
939 | (~((lo ^ (hi << 1)) << 11) & 0x00400000) | // I2 |
940 | ((hi & 0x003ff) << 12) | // imm0 |
941 | ((lo & 0x007ff) << 1)); // imm11:0 |
942 | } |
943 | // ELF for the ARM Architecture 4.6.1.1 the implicit addend for MOVW and |
944 | // MOVT is in the range -32768 <= A < 32768 |
945 | case R_ARM_MOVW_ABS_NC: |
946 | case R_ARM_MOVT_ABS: |
947 | case R_ARM_MOVW_PREL_NC: |
948 | case R_ARM_MOVT_PREL: |
949 | case R_ARM_MOVW_BREL_NC: |
950 | case R_ARM_MOVT_BREL: { |
951 | uint64_t val = read32(ctx, p: buf) & 0x000f0fff; |
952 | return SignExtend64<16>(x: ((val & 0x000f0000) >> 4) | (val & 0x00fff)); |
953 | } |
954 | case R_ARM_THM_MOVW_ABS_NC: |
955 | case R_ARM_THM_MOVT_ABS: |
956 | case R_ARM_THM_MOVW_PREL_NC: |
957 | case R_ARM_THM_MOVT_PREL: |
958 | case R_ARM_THM_MOVW_BREL_NC: |
959 | case R_ARM_THM_MOVT_BREL: { |
960 | // Encoding T3: A = imm4:i:imm3:imm8 |
961 | uint16_t hi = read16(ctx, p: buf); |
962 | uint16_t lo = read16(ctx, p: buf + 2); |
963 | return SignExtend64<16>(x: ((hi & 0x000f) << 12) | // imm4 |
964 | ((hi & 0x0400) << 1) | // i |
965 | ((lo & 0x7000) >> 4) | // imm3 |
966 | (lo & 0x00ff)); // imm8 |
967 | } |
968 | case R_ARM_THM_ALU_ABS_G0_NC: |
969 | case R_ARM_THM_ALU_ABS_G1_NC: |
970 | case R_ARM_THM_ALU_ABS_G2_NC: |
971 | case R_ARM_THM_ALU_ABS_G3: |
972 | return read16(ctx, p: buf) & 0xff; |
973 | case R_ARM_ALU_PC_G0: |
974 | case R_ARM_ALU_PC_G0_NC: |
975 | case R_ARM_ALU_PC_G1: |
976 | case R_ARM_ALU_PC_G1_NC: |
977 | case R_ARM_ALU_PC_G2: { |
978 | // 12-bit immediate is a modified immediate made up of a 4-bit even |
979 | // right rotation and 8-bit constant. After the rotation the value |
980 | // is zero-extended. When bit 23 is set the instruction is an add, when |
981 | // bit 22 is set it is a sub. |
982 | uint32_t instr = read32(ctx, p: buf); |
983 | uint32_t val = rotr32(val: instr & 0xff, amt: ((instr & 0xf00) >> 8) * 2); |
984 | return (instr & 0x00400000) ? -val : val; |
985 | } |
986 | case R_ARM_LDR_PC_G0: |
987 | case R_ARM_LDR_PC_G1: |
988 | case R_ARM_LDR_PC_G2: { |
989 | // ADR (literal) add = bit23, sub = bit22 |
990 | // LDR (literal) u = bit23 unsigned imm12 |
991 | bool u = read32(ctx, p: buf) & 0x00800000; |
992 | uint32_t imm12 = read32(ctx, p: buf) & 0xfff; |
993 | return u ? imm12 : -imm12; |
994 | } |
995 | case R_ARM_LDRS_PC_G0: |
996 | case R_ARM_LDRS_PC_G1: |
997 | case R_ARM_LDRS_PC_G2: { |
998 | // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 unsigned imm8 |
999 | uint32_t opcode = read32(ctx, p: buf); |
1000 | bool u = opcode & 0x00800000; |
1001 | uint32_t imm4l = opcode & 0xf; |
1002 | uint32_t imm4h = (opcode & 0xf00) >> 4; |
1003 | return u ? (imm4h | imm4l) : -(imm4h | imm4l); |
1004 | } |
1005 | case R_ARM_THM_ALU_PREL_11_0: { |
1006 | // Thumb2 ADR, which is an alias for a sub or add instruction with an |
1007 | // unsigned immediate. |
1008 | // ADR encoding T2 (sub), T3 (add) i:imm3:imm8 |
1009 | uint16_t hi = read16(ctx, p: buf); |
1010 | uint16_t lo = read16(ctx, p: buf + 2); |
1011 | uint64_t imm = (hi & 0x0400) << 1 | // i |
1012 | (lo & 0x7000) >> 4 | // imm3 |
1013 | (lo & 0x00ff); // imm8 |
1014 | // For sub, addend is negative, add is positive. |
1015 | return (hi & 0x00f0) ? -imm : imm; |
1016 | } |
1017 | case R_ARM_THM_PC8: |
1018 | // ADR and LDR (literal) encoding T1 |
1019 | // From ELF for the ARM Architecture the initial signed addend is formed |
1020 | // from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) ā 4) |
1021 | // this trick permits the PC bias of -4 to be encoded using imm8 = 0xff |
1022 | return ((((read16(ctx, p: buf) & 0xff) << 2) + 4) & 0x3ff) - 4; |
1023 | case R_ARM_THM_PC12: { |
1024 | // LDR (literal) encoding T2, add = (U == '1') imm12 |
1025 | bool u = read16(ctx, p: buf) & 0x0080; |
1026 | uint64_t imm12 = read16(ctx, p: buf + 2) & 0x0fff; |
1027 | return u ? imm12 : -imm12; |
1028 | } |
1029 | case R_ARM_NONE: |
1030 | case R_ARM_V4BX: |
1031 | case R_ARM_JUMP_SLOT: |
1032 | // These relocations are defined as not having an implicit addend. |
1033 | return 0; |
1034 | } |
1035 | } |
1036 | |
1037 | static bool isArmMapSymbol(const Symbol *b) { |
1038 | return b->getName() == "$a"|| b->getName().starts_with(Prefix: "$a."); |
1039 | } |
1040 | |
1041 | static bool isThumbMapSymbol(const Symbol *s) { |
1042 | return s->getName() == "$t"|| s->getName().starts_with(Prefix: "$t."); |
1043 | } |
1044 | |
1045 | static bool isDataMapSymbol(const Symbol *b) { |
1046 | return b->getName() == "$d"|| b->getName().starts_with(Prefix: "$d."); |
1047 | } |
1048 | |
1049 | void elf::sortArmMappingSymbols(Ctx &ctx) { |
1050 | // For each input section make sure the mapping symbols are sorted in |
1051 | // ascending order. |
1052 | for (auto &kv : static_cast<ARM &>(*ctx.target).sectionMap) { |
1053 | SmallVector<const Defined *, 0> &mapSyms = kv.second; |
1054 | llvm::stable_sort(Range&: mapSyms, C: [](const Defined *a, const Defined *b) { |
1055 | return a->value < b->value; |
1056 | }); |
1057 | } |
1058 | } |
1059 | |
1060 | void elf::addArmInputSectionMappingSymbols(Ctx &ctx) { |
1061 | // Collect mapping symbols for every executable input sections. |
1062 | // The linker generated mapping symbols for all the synthetic |
1063 | // sections are adding into the sectionmap through the function |
1064 | // addArmSyntheitcSectionMappingSymbol. |
1065 | auto §ionMap = static_cast<ARM &>(*ctx.target).sectionMap; |
1066 | for (ELFFileBase *file : ctx.objectFiles) { |
1067 | for (Symbol *sym : file->getLocalSymbols()) { |
1068 | auto *def = dyn_cast<Defined>(Val: sym); |
1069 | if (!def) |
1070 | continue; |
1071 | if (!isArmMapSymbol(b: def) && !isDataMapSymbol(b: def) && |
1072 | !isThumbMapSymbol(s: def)) |
1073 | continue; |
1074 | if (auto *sec = cast_if_present<InputSection>(Val: def->section)) |
1075 | if (sec->flags & SHF_EXECINSTR) |
1076 | sectionMap[sec].push_back(Elt: def); |
1077 | } |
1078 | } |
1079 | } |
1080 | |
1081 | // Synthetic sections are not backed by an ELF file where we can access the |
1082 | // symbol table, instead mapping symbols added to synthetic sections are stored |
1083 | // in the synthetic symbol table. Due to the presence of strip (--strip-all), |
1084 | // we can not rely on the synthetic symbol table retaining the mapping symbols. |
1085 | // Instead we record the mapping symbols locally. |
1086 | void elf::addArmSyntheticSectionMappingSymbol(Defined *sym) { |
1087 | if (!isArmMapSymbol(b: sym) && !isDataMapSymbol(b: sym) && !isThumbMapSymbol(s: sym)) |
1088 | return; |
1089 | if (auto *sec = cast_if_present<InputSection>(Val: sym->section)) |
1090 | if (sec->flags & SHF_EXECINSTR) |
1091 | static_cast<ARM &>(*sec->file->ctx.target).sectionMap[sec].push_back(Elt: sym); |
1092 | } |
1093 | |
1094 | static void toLittleEndianInstructions(uint8_t *buf, uint64_t start, |
1095 | uint64_t end, uint64_t width) { |
1096 | CodeState curState = static_cast<CodeState>(width); |
1097 | if (curState == CodeState::Arm) |
1098 | for (uint64_t i = start; i < end; i += width) |
1099 | write32le(P: buf + i, V: read32be(P: buf + i)); |
1100 | |
1101 | if (curState == CodeState::Thumb) |
1102 | for (uint64_t i = start; i < end; i += width) |
1103 | write16le(P: buf + i, V: read16be(P: buf + i)); |
1104 | } |
1105 | |
1106 | // Arm BE8 big endian format requires instructions to be little endian, with |
1107 | // the initial contents big-endian. Convert the big-endian instructions to |
1108 | // little endian leaving literal data untouched. We use mapping symbols to |
1109 | // identify half open intervals of Arm code [$a, non $a) and Thumb code |
1110 | // [$t, non $t) and convert these to little endian a word or half word at a |
1111 | // time respectively. |
1112 | void elf::convertArmInstructionstoBE8(Ctx &ctx, InputSection *sec, |
1113 | uint8_t *buf) { |
1114 | auto §ionMap = static_cast<ARM &>(*ctx.target).sectionMap; |
1115 | auto it = sectionMap.find(Val: sec); |
1116 | if (it == sectionMap.end()) |
1117 | return; |
1118 | |
1119 | SmallVector<const Defined *, 0> &mapSyms = it->second; |
1120 | |
1121 | if (mapSyms.empty()) |
1122 | return; |
1123 | |
1124 | CodeState curState = CodeState::Data; |
1125 | uint64_t start = 0, width = 0, size = sec->getSize(); |
1126 | for (auto &msym : mapSyms) { |
1127 | CodeState newState = CodeState::Data; |
1128 | if (isThumbMapSymbol(s: msym)) |
1129 | newState = CodeState::Thumb; |
1130 | else if (isArmMapSymbol(b: msym)) |
1131 | newState = CodeState::Arm; |
1132 | |
1133 | if (newState == curState) |
1134 | continue; |
1135 | |
1136 | if (curState != CodeState::Data) { |
1137 | width = static_cast<uint64_t>(curState); |
1138 | toLittleEndianInstructions(buf, start, end: msym->value, width); |
1139 | } |
1140 | start = msym->value; |
1141 | curState = newState; |
1142 | } |
1143 | |
1144 | // Passed last mapping symbol, may need to reverse |
1145 | // up to end of section. |
1146 | if (curState != CodeState::Data) { |
1147 | width = static_cast<uint64_t>(curState); |
1148 | toLittleEndianInstructions(buf, start, end: size, width); |
1149 | } |
1150 | } |
1151 | |
1152 | // The Arm Cortex-M Security Extensions (CMSE) splits a system into two parts; |
1153 | // the non-secure and secure states with the secure state inaccessible from the |
1154 | // non-secure state, apart from an area of memory in secure state called the |
1155 | // secure gateway which is accessible from non-secure state. The secure gateway |
1156 | // contains one or more entry points which must start with a landing pad |
1157 | // instruction SG. Arm recommends that the secure gateway consists only of |
1158 | // secure gateway veneers, which are made up of a SG instruction followed by a |
1159 | // branch to the destination in secure state. Full details can be found in Arm |
1160 | // v8-M Security Extensions Requirements on Development Tools. |
1161 | // |
1162 | // The CMSE model of software development requires the non-secure and secure |
1163 | // states to be developed as two separate programs. The non-secure developer is |
1164 | // provided with an import library defining symbols describing the entry points |
1165 | // in the secure gateway. No additional linker support is required for the |
1166 | // non-secure state. |
1167 | // |
1168 | // Development of the secure state requires linker support to manage the secure |
1169 | // gateway veneers. The management consists of: |
1170 | // - Creation of new secure gateway veneers based on symbol conventions. |
1171 | // - Checking the address of existing secure gateway veneers. |
1172 | // - Warning when existing secure gateway veneers removed. |
1173 | // |
1174 | // The secure gateway veneers are created in an import library, which is just an |
1175 | // ELF object with a symbol table. The import library is controlled by two |
1176 | // command line options: |
1177 | // --in-implib (specify an input import library from a previous revision of the |
1178 | // program). |
1179 | // --out-implib (specify an output import library to be created by the linker). |
1180 | // |
1181 | // The input import library is used to manage consistency of the secure entry |
1182 | // points. The output import library is for new and updated secure entry points. |
1183 | // |
1184 | // The symbol convention that identifies secure entry functions is the prefix |
1185 | // __acle_se_ for a symbol called name the linker is expected to create a secure |
1186 | // gateway veneer if symbols __acle_se_name and name have the same address. |
1187 | // After creating a secure gateway veneer the symbol name labels the secure |
1188 | // gateway veneer and the __acle_se_name labels the function definition. |
1189 | // |
1190 | // The LLD implementation: |
1191 | // - Reads an existing import library with importCmseSymbols(). |
1192 | // - Determines which new secure gateway veneers to create and redirects calls |
1193 | // within the secure state to the __acle_se_ prefixed symbol with |
1194 | // processArmCmseSymbols(). |
1195 | // - Models the SG veneers as a synthetic section. |
1196 | |
1197 | // Initialize symbols. symbols is a parallel array to the corresponding ELF |
1198 | // symbol table. |
1199 | template <class ELFT> void ObjFile<ELFT>::importCmseSymbols() { |
1200 | ArrayRef<Elf_Sym> eSyms = getELFSyms<ELFT>(); |
1201 | // Error for local symbols. The symbol at index 0 is LOCAL. So skip it. |
1202 | for (size_t i = 1, end = firstGlobal; i != end; ++i) { |
1203 | Err(ctx) << "CMSE symbol '"<< CHECK2(eSyms[i].getName(stringTable), this) |
1204 | << "' in import library '"<< this << "' is not global"; |
1205 | } |
1206 | |
1207 | for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { |
1208 | const Elf_Sym &eSym = eSyms[i]; |
1209 | Defined *sym = reinterpret_cast<Defined *>(make<SymbolUnion>()); |
1210 | |
1211 | // Initialize symbol fields. |
1212 | memset(s: static_cast<void *>(sym), c: 0, n: sizeof(Symbol)); |
1213 | sym->setName(CHECK2(eSyms[i].getName(stringTable), this)); |
1214 | sym->value = eSym.st_value; |
1215 | sym->size = eSym.st_size; |
1216 | sym->type = eSym.getType(); |
1217 | sym->binding = eSym.getBinding(); |
1218 | sym->stOther = eSym.st_other; |
1219 | |
1220 | if (eSym.st_shndx != SHN_ABS) { |
1221 | Err(ctx) << "CMSE symbol '"<< sym->getName() << "' in import library '" |
1222 | << this << "' is not absolute"; |
1223 | continue; |
1224 | } |
1225 | |
1226 | if (!(eSym.st_value & 1) || (eSym.getType() != STT_FUNC)) { |
1227 | Err(ctx) << "CMSE symbol '"<< sym->getName() << "' in import library '" |
1228 | << this << "' is not a Thumb function definition"; |
1229 | continue; |
1230 | } |
1231 | |
1232 | if (ctx.symtab->cmseImportLib.count(Key: sym->getName())) { |
1233 | Err(ctx) << "CMSE symbol '"<< sym->getName() |
1234 | << "' is multiply defined in import library '"<< this << "'"; |
1235 | continue; |
1236 | } |
1237 | |
1238 | if (eSym.st_size != ACLESESYM_SIZE) { |
1239 | Warn(ctx) << "CMSE symbol '"<< sym->getName() << "' in import library '" |
1240 | << this << "' does not have correct size of "<< ACLESESYM_SIZE |
1241 | << " bytes"; |
1242 | } |
1243 | |
1244 | ctx.symtab->cmseImportLib[sym->getName()] = sym; |
1245 | } |
1246 | } |
1247 | |
1248 | // Check symbol attributes of the acleSeSym, sym pair. |
1249 | // Both symbols should be global/weak Thumb code symbol definitions. |
1250 | static std::string checkCmseSymAttributes(Ctx &ctx, Symbol *acleSeSym, |
1251 | Symbol *sym) { |
1252 | auto check = [&](Symbol *s, StringRef type) -> std::optional<std::string> { |
1253 | auto d = dyn_cast_or_null<Defined>(Val: s); |
1254 | if (!(d && d->isFunc() && (d->value & 1))) |
1255 | return (Twine(toStr(ctx, f: s->file)) + ": cmse "+ type + " symbol '"+ |
1256 | s->getName() + "' is not a Thumb function definition") |
1257 | .str(); |
1258 | if (!d->section) |
1259 | return (Twine(toStr(ctx, f: s->file)) + ": cmse "+ type + " symbol '"+ |
1260 | s->getName() + "' cannot be an absolute symbol") |
1261 | .str(); |
1262 | return std::nullopt; |
1263 | }; |
1264 | for (auto [sym, type] : |
1265 | {std::make_pair(x&: acleSeSym, y: "special"), std::make_pair(x&: sym, y: "entry")}) |
1266 | if (auto err = check(sym, type)) |
1267 | return *err; |
1268 | return ""; |
1269 | } |
1270 | |
1271 | // Look for [__acle_se_<sym>, <sym>] pairs, as specified in the Cortex-M |
1272 | // Security Extensions specification. |
1273 | // 1) <sym> : A standard function name. |
1274 | // 2) __acle_se_<sym> : A special symbol that prefixes the standard function |
1275 | // name with __acle_se_. |
1276 | // Both these symbols are Thumb function symbols with external linkage. |
1277 | // <sym> may be redefined in .gnu.sgstubs. |
1278 | void elf::processArmCmseSymbols(Ctx &ctx) { |
1279 | if (!ctx.arg.cmseImplib) |
1280 | return; |
1281 | // Only symbols with external linkage end up in ctx.symtab, so no need to do |
1282 | // linkage checks. Only check symbol type. |
1283 | for (Symbol *acleSeSym : ctx.symtab->getSymbols()) { |
1284 | if (!acleSeSym->getName().starts_with(Prefix: ACLESESYM_PREFIX)) |
1285 | continue; |
1286 | // If input object build attributes do not support CMSE, error and disable |
1287 | // further scanning for <sym>, __acle_se_<sym> pairs. |
1288 | if (!ctx.arg.armCMSESupport) { |
1289 | Err(ctx) << "CMSE is only supported by ARMv8-M architecture or later"; |
1290 | ctx.arg.cmseImplib = false; |
1291 | break; |
1292 | } |
1293 | |
1294 | // Try to find the associated symbol definition. |
1295 | // Symbol must have external linkage. |
1296 | StringRef name = acleSeSym->getName().substr(Start: std::strlen(s: ACLESESYM_PREFIX)); |
1297 | Symbol *sym = ctx.symtab->find(name); |
1298 | if (!sym) { |
1299 | Err(ctx) << acleSeSym->file << ": cmse special symbol '" |
1300 | << acleSeSym->getName() |
1301 | << "' detected, but no associated entry function definition '" |
1302 | << name << "' with external linkage found"; |
1303 | continue; |
1304 | } |
1305 | |
1306 | std::string errMsg = checkCmseSymAttributes(ctx, acleSeSym, sym); |
1307 | if (!errMsg.empty()) { |
1308 | Err(ctx) << errMsg; |
1309 | continue; |
1310 | } |
1311 | |
1312 | // <sym> may be redefined later in the link in .gnu.sgstubs |
1313 | ctx.symtab->cmseSymMap[name] = {.acleSeSym: acleSeSym, .sym: sym}; |
1314 | } |
1315 | |
1316 | // If this is an Arm CMSE secure app, replace references to entry symbol <sym> |
1317 | // with its corresponding special symbol __acle_se_<sym>. |
1318 | parallelForEach(R&: ctx.objectFiles, Fn: [&](InputFile *file) { |
1319 | MutableArrayRef<Symbol *> syms = file->getMutableSymbols(); |
1320 | for (size_t i = 0, e = syms.size(); i != e; ++i) { |
1321 | StringRef symName = syms[i]->getName(); |
1322 | auto it = ctx.symtab->cmseSymMap.find(Key: symName); |
1323 | if (it != ctx.symtab->cmseSymMap.end()) |
1324 | syms[i] = it->second.acleSeSym; |
1325 | } |
1326 | }); |
1327 | } |
1328 | |
1329 | ArmCmseSGSection::ArmCmseSGSection(Ctx &ctx) |
1330 | : SyntheticSection(ctx, ".gnu.sgstubs", SHT_PROGBITS, |
1331 | SHF_ALLOC | SHF_EXECINSTR, |
1332 | /*addralign=*/32) { |
1333 | entsize = ACLESESYM_SIZE; |
1334 | // The range of addresses used in the CMSE import library should be fixed. |
1335 | for (auto &[_, sym] : ctx.symtab->cmseImportLib) { |
1336 | if (impLibMaxAddr <= sym->value) |
1337 | impLibMaxAddr = sym->value + sym->size; |
1338 | } |
1339 | if (ctx.symtab->cmseSymMap.empty()) |
1340 | return; |
1341 | addMappingSymbol(); |
1342 | for (auto &[_, entryFunc] : ctx.symtab->cmseSymMap) |
1343 | addSGVeneer(sym: cast<Defined>(Val: entryFunc.acleSeSym), |
1344 | ext_sym: cast<Defined>(Val: entryFunc.sym)); |
1345 | for (auto &[_, sym] : ctx.symtab->cmseImportLib) { |
1346 | if (!ctx.symtab->inCMSEOutImpLib.count(Key: sym->getName())) |
1347 | Warn(ctx) |
1348 | << "entry function '"<< sym->getName() |
1349 | << "' from CMSE import library is not present in secure application"; |
1350 | } |
1351 | |
1352 | if (!ctx.symtab->cmseImportLib.empty() && ctx.arg.cmseOutputLib.empty()) { |
1353 | for (auto &[_, entryFunc] : ctx.symtab->cmseSymMap) { |
1354 | Symbol *sym = entryFunc.sym; |
1355 | if (!ctx.symtab->inCMSEOutImpLib.count(Key: sym->getName())) |
1356 | Warn(ctx) << "new entry function '"<< sym->getName() |
1357 | << "' introduced but no output import library specified"; |
1358 | } |
1359 | } |
1360 | } |
1361 | |
1362 | void ArmCmseSGSection::addSGVeneer(Symbol *acleSeSym, Symbol *sym) { |
1363 | entries.emplace_back(Args&: acleSeSym, Args&: sym); |
1364 | if (ctx.symtab->cmseImportLib.count(Key: sym->getName())) |
1365 | ctx.symtab->inCMSEOutImpLib[sym->getName()] = true; |
1366 | // Symbol addresses different, nothing to do. |
1367 | if (acleSeSym->file != sym->file || |
1368 | cast<Defined>(Val&: *acleSeSym).value != cast<Defined>(Val&: *sym).value) |
1369 | return; |
1370 | // Only secure symbols with values equal to that of it's non-secure |
1371 | // counterpart needs to be in the .gnu.sgstubs section. |
1372 | std::unique_ptr<ArmCmseSGVeneer> ss; |
1373 | auto it = ctx.symtab->cmseImportLib.find(Key: sym->getName()); |
1374 | if (it != ctx.symtab->cmseImportLib.end()) { |
1375 | Defined *impSym = it->second; |
1376 | ss = std::make_unique<ArmCmseSGVeneer>(args&: sym, args&: acleSeSym, args&: impSym->value); |
1377 | } else { |
1378 | ss = std::make_unique<ArmCmseSGVeneer>(args&: sym, args&: acleSeSym); |
1379 | ++newEntries; |
1380 | } |
1381 | sgVeneers.emplace_back(Args: std::move(ss)); |
1382 | } |
1383 | |
1384 | void ArmCmseSGSection::writeTo(uint8_t *buf) { |
1385 | for (std::unique_ptr<ArmCmseSGVeneer> &s : sgVeneers) { |
1386 | uint8_t *p = buf + s->offset; |
1387 | write16(ctx, p: p + 0, v: 0xe97f); // SG |
1388 | write16(ctx, p: p + 2, v: 0xe97f); |
1389 | write16(ctx, p: p + 4, v: 0xf000); // B.W S |
1390 | write16(ctx, p: p + 6, v: 0xb000); |
1391 | ctx.target->relocateNoSym(loc: p + 4, type: R_ARM_THM_JUMP24, |
1392 | val: s->acleSeSym->getVA(ctx) - |
1393 | (getVA() + s->offset + s->size)); |
1394 | } |
1395 | } |
1396 | |
1397 | void ArmCmseSGSection::addMappingSymbol() { |
1398 | addSyntheticLocal(ctx, name: "$t", type: STT_NOTYPE, /*off=*/value: 0, /*size=*/0, section&: *this); |
1399 | } |
1400 | |
1401 | size_t ArmCmseSGSection::getSize() const { |
1402 | if (sgVeneers.empty()) |
1403 | return (impLibMaxAddr ? impLibMaxAddr - getVA() : 0) + newEntries * entsize; |
1404 | |
1405 | return entries.size() * entsize; |
1406 | } |
1407 | |
1408 | void ArmCmseSGSection::finalizeContents() { |
1409 | if (sgVeneers.empty()) |
1410 | return; |
1411 | |
1412 | auto it = |
1413 | std::stable_partition(first: sgVeneers.begin(), last: sgVeneers.end(), |
1414 | pred: [](auto &i) { return i->getAddr().has_value(); }); |
1415 | std::sort(first: sgVeneers.begin(), last: it, comp: [](auto &a, auto &b) { |
1416 | return a->getAddr().value() < b->getAddr().value(); |
1417 | }); |
1418 | // This is the partition of the veneers with fixed addresses. |
1419 | uint64_t addr = (*sgVeneers.begin())->getAddr().has_value() |
1420 | ? (*sgVeneers.begin())->getAddr().value() |
1421 | : getVA(); |
1422 | // Check if the start address of '.gnu.sgstubs' correspond to the |
1423 | // linker-synthesized veneer with the lowest address. |
1424 | if ((getVA() & ~1) != (addr & ~1)) { |
1425 | Err(ctx) |
1426 | << "start address of '.gnu.sgstubs' is different from previous link"; |
1427 | return; |
1428 | } |
1429 | |
1430 | for (auto [i, s] : enumerate(First&: sgVeneers)) { |
1431 | s->offset = i * s->size; |
1432 | Defined(ctx, file, StringRef(), s->sym->binding, s->sym->stOther, |
1433 | s->sym->type, s->offset | 1, s->size, this) |
1434 | .overwrite(sym&: *s->sym); |
1435 | } |
1436 | } |
1437 | |
1438 | // Write the CMSE import library to disk. |
1439 | // The CMSE import library is a relocatable object with only a symbol table. |
1440 | // The symbols are copies of the (absolute) symbols of the secure gateways |
1441 | // in the executable output by this link. |
1442 | // See ArmĀ® v8-M Security Extensions: Requirements on Development Tools |
1443 | // https://developer.arm.com/documentation/ecm0359818/latest |
1444 | template <typename ELFT> void elf::writeARMCmseImportLib(Ctx &ctx) { |
1445 | auto shstrtab = |
1446 | std::make_unique<StringTableSection>(args&: ctx, args: ".shstrtab", /*dynamic=*/args: false); |
1447 | auto strtab = |
1448 | std::make_unique<StringTableSection>(args&: ctx, args: ".strtab", /*dynamic=*/args: false); |
1449 | auto impSymTab = std::make_unique<SymbolTableSection<ELFT>>(ctx, *strtab); |
1450 | |
1451 | SmallVector<std::pair<std::unique_ptr<OutputSection>, SyntheticSection *>, 0> |
1452 | osIsPairs; |
1453 | osIsPairs.emplace_back( |
1454 | Args: std::make_unique<OutputSection>(args&: ctx, args&: strtab->name, args: 0, args: 0), Args: strtab.get()); |
1455 | osIsPairs.emplace_back( |
1456 | std::make_unique<OutputSection>(ctx, impSymTab->name, 0, 0), |
1457 | impSymTab.get()); |
1458 | osIsPairs.emplace_back( |
1459 | Args: std::make_unique<OutputSection>(args&: ctx, args&: shstrtab->name, args: 0, args: 0), |
1460 | Args: shstrtab.get()); |
1461 | |
1462 | llvm::sort(ctx.symtab->cmseSymMap, [&](const auto &a, const auto &b) { |
1463 | return a.second.sym->getVA(ctx) < b.second.sym->getVA(ctx); |
1464 | }); |
1465 | // Copy the secure gateway entry symbols to the import library symbol table. |
1466 | for (auto &p : ctx.symtab->cmseSymMap) { |
1467 | Defined *d = cast<Defined>(Val: p.second.sym); |
1468 | impSymTab->addSymbol(makeDefined( |
1469 | args&: ctx, args&: ctx.internalFile, args: d->getName(), args: d->computeBinding(ctx), |
1470 | /*stOther=*/args: 0, args: STT_FUNC, args: d->getVA(ctx), args: d->getSize(), args: nullptr)); |
1471 | } |
1472 | |
1473 | size_t idx = 0; |
1474 | uint64_t off = sizeof(typename ELFT::Ehdr); |
1475 | for (auto &[osec, isec] : osIsPairs) { |
1476 | osec->sectionIndex = ++idx; |
1477 | osec->recordSection(isec); |
1478 | osec->finalizeInputSections(); |
1479 | osec->shName = shstrtab->addString(s: osec->name); |
1480 | osec->size = isec->getSize(); |
1481 | isec->finalizeContents(); |
1482 | osec->offset = alignToPowerOf2(Value: off, Align: osec->addralign); |
1483 | off = osec->offset + osec->size; |
1484 | } |
1485 | |
1486 | const uint64_t sectionHeaderOff = alignToPowerOf2(Value: off, Align: ctx.arg.wordsize); |
1487 | const auto shnum = osIsPairs.size() + 1; |
1488 | const uint64_t fileSize = |
1489 | sectionHeaderOff + shnum * sizeof(typename ELFT::Shdr); |
1490 | const unsigned flags = |
1491 | ctx.arg.mmapOutputFile ? (unsigned)FileOutputBuffer::F_mmap : 0; |
1492 | unlinkAsync(path: ctx.arg.cmseOutputLib); |
1493 | Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr = |
1494 | FileOutputBuffer::create(FilePath: ctx.arg.cmseOutputLib, Size: fileSize, Flags: flags); |
1495 | if (!bufferOrErr) { |
1496 | Err(ctx) << "failed to open "<< ctx.arg.cmseOutputLib << ": " |
1497 | << bufferOrErr.takeError(); |
1498 | return; |
1499 | } |
1500 | |
1501 | // Write the ELF Header |
1502 | std::unique_ptr<FileOutputBuffer> &buffer = *bufferOrErr; |
1503 | uint8_t *const buf = buffer->getBufferStart(); |
1504 | memcpy(dest: buf, src: "\177ELF", n: 4); |
1505 | auto *eHdr = reinterpret_cast<typename ELFT::Ehdr *>(buf); |
1506 | eHdr->e_type = ET_REL; |
1507 | eHdr->e_entry = 0; |
1508 | eHdr->e_shoff = sectionHeaderOff; |
1509 | eHdr->e_ident[EI_CLASS] = ELFCLASS32; |
1510 | eHdr->e_ident[EI_DATA] = ctx.arg.isLE ? ELFDATA2LSB : ELFDATA2MSB; |
1511 | eHdr->e_ident[EI_VERSION] = EV_CURRENT; |
1512 | eHdr->e_ident[EI_OSABI] = ctx.arg.osabi; |
1513 | eHdr->e_ident[EI_ABIVERSION] = 0; |
1514 | eHdr->e_machine = EM_ARM; |
1515 | eHdr->e_version = EV_CURRENT; |
1516 | eHdr->e_flags = ctx.arg.eflags; |
1517 | eHdr->e_ehsize = sizeof(typename ELFT::Ehdr); |
1518 | eHdr->e_phnum = 0; |
1519 | eHdr->e_shentsize = sizeof(typename ELFT::Shdr); |
1520 | eHdr->e_phoff = 0; |
1521 | eHdr->e_phentsize = 0; |
1522 | eHdr->e_shnum = shnum; |
1523 | eHdr->e_shstrndx = shstrtab->getParent()->sectionIndex; |
1524 | |
1525 | // Write the section header table. |
1526 | auto *sHdrs = reinterpret_cast<typename ELFT::Shdr *>(buf + eHdr->e_shoff); |
1527 | for (auto &[osec, _] : osIsPairs) |
1528 | osec->template writeHeaderTo<ELFT>(++sHdrs); |
1529 | |
1530 | // Write section contents to a mmap'ed file. |
1531 | { |
1532 | parallel::TaskGroup tg; |
1533 | for (auto &[osec, _] : osIsPairs) |
1534 | osec->template writeTo<ELFT>(ctx, buf + osec->offset, tg); |
1535 | } |
1536 | |
1537 | if (auto e = buffer->commit()) |
1538 | Err(ctx) << "failed to write output '"<< buffer->getPath() |
1539 | << "': "<< std::move(e); |
1540 | } |
1541 | |
1542 | void elf::setARMTargetInfo(Ctx &ctx) { ctx.target.reset(p: new ARM(ctx)); } |
1543 | |
1544 | template void elf::writeARMCmseImportLib<ELF32LE>(Ctx &); |
1545 | template void elf::writeARMCmseImportLib<ELF32BE>(Ctx &); |
1546 | template void elf::writeARMCmseImportLib<ELF64LE>(Ctx &); |
1547 | template void elf::writeARMCmseImportLib<ELF64BE>(Ctx &); |
1548 | |
1549 | template void ObjFile<ELF32LE>::importCmseSymbols(); |
1550 | template void ObjFile<ELF32BE>::importCmseSymbols(); |
1551 | template void ObjFile<ELF64LE>::importCmseSymbols(); |
1552 | template void ObjFile<ELF64BE>::importCmseSymbols(); |
1553 |
Definitions
- ARM
- CodeState
- ARM
- calcEFlags
- getRelExpr
- getDynRel
- writeGotPlt
- writeIgotPlt
- writePltHeaderLong
- useThumbPLTs
- writePltHeader
- addPltHeaderSymbols
- writePltLong
- writePlt
- addPltSymbols
- needsThunk
- getThunkSectionSpacing
- inBranchRange
- stateChangeWarning
- rotr32
- getRemAndLZForGroup
- encodeAluGroup
- encodeLdrGroup
- encodeLdrsGroup
- relocate
- getImplicitAddend
- isArmMapSymbol
- isThumbMapSymbol
- isDataMapSymbol
- sortArmMappingSymbols
- addArmInputSectionMappingSymbols
- addArmSyntheticSectionMappingSymbol
- toLittleEndianInstructions
- convertArmInstructionstoBE8
- importCmseSymbols
- checkCmseSymAttributes
- processArmCmseSymbols
- ArmCmseSGSection
- addSGVeneer
- writeTo
- addMappingSymbol
- getSize
- finalizeContents
- writeARMCmseImportLib
Learn to use CMake with our Intro Training
Find out more