1 | //===- LoongArch.cpp ------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "InputFiles.h" |
10 | #include "OutputSections.h" |
11 | #include "Symbols.h" |
12 | #include "SyntheticSections.h" |
13 | #include "Target.h" |
14 | #include "llvm/Support/LEB128.h" |
15 | |
16 | using namespace llvm; |
17 | using namespace llvm::object; |
18 | using namespace llvm::support::endian; |
19 | using namespace llvm::ELF; |
20 | using namespace lld; |
21 | using namespace lld::elf; |
22 | |
23 | namespace { |
24 | class LoongArch final : public TargetInfo { |
25 | public: |
26 | LoongArch(); |
27 | uint32_t calcEFlags() const override; |
28 | int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; |
29 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
30 | void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; |
31 | void writePltHeader(uint8_t *buf) const override; |
32 | void writePlt(uint8_t *buf, const Symbol &sym, |
33 | uint64_t pltEntryAddr) const override; |
34 | RelType getDynRel(RelType type) const override; |
35 | RelExpr getRelExpr(RelType type, const Symbol &s, |
36 | const uint8_t *loc) const override; |
37 | bool usesOnlyLowPageBits(RelType type) const override; |
38 | void relocate(uint8_t *loc, const Relocation &rel, |
39 | uint64_t val) const override; |
40 | bool relaxOnce(int pass) const override; |
41 | void finalizeRelax(int passes) const override; |
42 | }; |
43 | } // end anonymous namespace |
44 | |
45 | namespace { |
46 | enum Op { |
47 | SUB_W = 0x00110000, |
48 | SUB_D = 0x00118000, |
49 | BREAK = 0x002a0000, |
50 | SRLI_W = 0x00448000, |
51 | SRLI_D = 0x00450000, |
52 | ADDI_W = 0x02800000, |
53 | ADDI_D = 0x02c00000, |
54 | ANDI = 0x03400000, |
55 | PCADDU12I = 0x1c000000, |
56 | LD_W = 0x28800000, |
57 | LD_D = 0x28c00000, |
58 | JIRL = 0x4c000000, |
59 | }; |
60 | |
61 | enum Reg { |
62 | R_ZERO = 0, |
63 | R_RA = 1, |
64 | R_TP = 2, |
65 | R_T0 = 12, |
66 | R_T1 = 13, |
67 | R_T2 = 14, |
68 | R_T3 = 15, |
69 | }; |
70 | } // namespace |
71 | |
72 | // Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences |
73 | // like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i` |
74 | // produces a PC-relative intermediate value with the lowest 12 bits zeroed (the |
75 | // "page") for the next instruction to add in the "page offset". (`pcalau12i` |
76 | // stands for something like "PC ALigned Add Upper that starts from the 12th |
77 | // bit, Immediate".) |
78 | // |
79 | // Here a "page" is in fact just another way to refer to the 12-bit range |
80 | // allowed by the immediate field of the addi/ld/st instructions, and not |
81 | // related to the system or the kernel's actual page size. The semantics happen |
82 | // to match the AArch64 `adrp`, so the concept of "page" is borrowed here. |
83 | static uint64_t getLoongArchPage(uint64_t p) { |
84 | return p & ~static_cast<uint64_t>(0xfff); |
85 | } |
86 | |
87 | static uint32_t lo12(uint32_t val) { return val & 0xfff; } |
88 | |
89 | // Calculate the adjusted page delta between dest and PC. |
90 | uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) { |
91 | // Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d |
92 | // + lu52i.d`, they must be adjacent so that we can infer the PC of |
93 | // `pcalau12i` when calculating the page delta for the other two instructions |
94 | // (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit |
95 | // complicated. Just use psABI recommended algorithm. |
96 | uint64_t pcalau12i_pc; |
97 | switch (type) { |
98 | case R_LARCH_PCALA64_LO20: |
99 | case R_LARCH_GOT64_PC_LO20: |
100 | case R_LARCH_TLS_IE64_PC_LO20: |
101 | pcalau12i_pc = pc - 8; |
102 | break; |
103 | case R_LARCH_PCALA64_HI12: |
104 | case R_LARCH_GOT64_PC_HI12: |
105 | case R_LARCH_TLS_IE64_PC_HI12: |
106 | pcalau12i_pc = pc - 12; |
107 | break; |
108 | default: |
109 | pcalau12i_pc = pc; |
110 | break; |
111 | } |
112 | uint64_t result = getLoongArchPage(p: dest) - getLoongArchPage(p: pcalau12i_pc); |
113 | if (dest & 0x800) |
114 | result += 0x1000 - 0x1'0000'0000; |
115 | if (result & 0x8000'0000) |
116 | result += 0x1'0000'0000; |
117 | return result; |
118 | } |
119 | |
120 | static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } |
121 | |
122 | static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) { |
123 | return op | d | (j << 5) | (k << 10); |
124 | } |
125 | |
126 | // Extract bits v[begin:end], where range is inclusive. |
127 | static uint32_t (uint64_t v, uint32_t begin, uint32_t end) { |
128 | return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end; |
129 | } |
130 | |
131 | static uint32_t setD5k16(uint32_t insn, uint32_t imm) { |
132 | uint32_t immLo = extractBits(v: imm, begin: 15, end: 0); |
133 | uint32_t immHi = extractBits(v: imm, begin: 20, end: 16); |
134 | return (insn & 0xfc0003e0) | (immLo << 10) | immHi; |
135 | } |
136 | |
137 | static uint32_t setD10k16(uint32_t insn, uint32_t imm) { |
138 | uint32_t immLo = extractBits(v: imm, begin: 15, end: 0); |
139 | uint32_t immHi = extractBits(v: imm, begin: 25, end: 16); |
140 | return (insn & 0xfc000000) | (immLo << 10) | immHi; |
141 | } |
142 | |
143 | static uint32_t setJ20(uint32_t insn, uint32_t imm) { |
144 | return (insn & 0xfe00001f) | (extractBits(v: imm, begin: 19, end: 0) << 5); |
145 | } |
146 | |
147 | static uint32_t setK12(uint32_t insn, uint32_t imm) { |
148 | return (insn & 0xffc003ff) | (extractBits(v: imm, begin: 11, end: 0) << 10); |
149 | } |
150 | |
151 | static uint32_t setK16(uint32_t insn, uint32_t imm) { |
152 | return (insn & 0xfc0003ff) | (extractBits(v: imm, begin: 15, end: 0) << 10); |
153 | } |
154 | |
155 | static bool isJirl(uint32_t insn) { |
156 | return (insn & 0xfc000000) == JIRL; |
157 | } |
158 | |
159 | static void handleUleb128(uint8_t *loc, uint64_t val) { |
160 | const uint32_t maxcount = 1 + 64 / 7; |
161 | uint32_t count; |
162 | const char *error = nullptr; |
163 | uint64_t orig = decodeULEB128(p: loc, n: &count, end: nullptr, error: &error); |
164 | if (count > maxcount || (count == maxcount && error)) |
165 | errorOrWarn(msg: getErrorLocation(loc) + "extra space for uleb128" ); |
166 | uint64_t mask = count < maxcount ? (1ULL << 7 * count) - 1 : -1ULL; |
167 | encodeULEB128(Value: (orig + val) & mask, p: loc, PadTo: count); |
168 | } |
169 | |
170 | LoongArch::LoongArch() { |
171 | // The LoongArch ISA itself does not have a limit on page sizes. According to |
172 | // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is |
173 | // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to |
174 | // "unlimited". |
175 | // However, practically the maximum usable page size is constrained by the |
176 | // kernel implementation, and 64KiB is the biggest non-huge page size |
177 | // supported by Linux as of v6.4. The most widespread page size in use, |
178 | // though, is 16KiB. |
179 | defaultCommonPageSize = 16384; |
180 | defaultMaxPageSize = 65536; |
181 | write32le(P: trapInstr.data(), V: BREAK); // break 0 |
182 | |
183 | copyRel = R_LARCH_COPY; |
184 | pltRel = R_LARCH_JUMP_SLOT; |
185 | relativeRel = R_LARCH_RELATIVE; |
186 | iRelativeRel = R_LARCH_IRELATIVE; |
187 | |
188 | if (config->is64) { |
189 | symbolicRel = R_LARCH_64; |
190 | tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64; |
191 | tlsOffsetRel = R_LARCH_TLS_DTPREL64; |
192 | tlsGotRel = R_LARCH_TLS_TPREL64; |
193 | } else { |
194 | symbolicRel = R_LARCH_32; |
195 | tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32; |
196 | tlsOffsetRel = R_LARCH_TLS_DTPREL32; |
197 | tlsGotRel = R_LARCH_TLS_TPREL32; |
198 | } |
199 | |
200 | gotRel = symbolicRel; |
201 | |
202 | // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map |
203 | gotPltHeaderEntriesNum = 2; |
204 | |
205 | pltHeaderSize = 32; |
206 | pltEntrySize = 16; |
207 | ipltEntrySize = 16; |
208 | } |
209 | |
210 | static uint32_t getEFlags(const InputFile *f) { |
211 | if (config->is64) |
212 | return cast<ObjFile<ELF64LE>>(Val: f)->getObj().getHeader().e_flags; |
213 | return cast<ObjFile<ELF32LE>>(Val: f)->getObj().getHeader().e_flags; |
214 | } |
215 | |
216 | static bool inputFileHasCode(const InputFile *f) { |
217 | for (const auto *sec : f->getSections()) |
218 | if (sec && sec->flags & SHF_EXECINSTR) |
219 | return true; |
220 | |
221 | return false; |
222 | } |
223 | |
224 | uint32_t LoongArch::calcEFlags() const { |
225 | // If there are only binary input files (from -b binary), use a |
226 | // value of 0 for the ELF header flags. |
227 | if (ctx.objectFiles.empty()) |
228 | return 0; |
229 | |
230 | uint32_t target = 0; |
231 | const InputFile *targetFile; |
232 | for (const InputFile *f : ctx.objectFiles) { |
233 | // Do not enforce ABI compatibility if the input file does not contain code. |
234 | // This is useful for allowing linkage with data-only object files produced |
235 | // with tools like objcopy, that have zero e_flags. |
236 | if (!inputFileHasCode(f)) |
237 | continue; |
238 | |
239 | // Take the first non-zero e_flags as the reference. |
240 | uint32_t flags = getEFlags(f); |
241 | if (target == 0 && flags != 0) { |
242 | target = flags; |
243 | targetFile = f; |
244 | } |
245 | |
246 | if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) != |
247 | (target & EF_LOONGARCH_ABI_MODIFIER_MASK)) |
248 | error(msg: toString(f) + |
249 | ": cannot link object files with different ABI from " + |
250 | toString(f: targetFile)); |
251 | |
252 | // We cannot process psABI v1.x / object ABI v0 files (containing stack |
253 | // relocations), unlike ld.bfd. |
254 | // |
255 | // Instead of blindly accepting every v0 object and only failing at |
256 | // relocation processing time, just disallow interlink altogether. We |
257 | // don't expect significant usage of object ABI v0 in the wild (the old |
258 | // world may continue using object ABI v0 for a while, but as it's not |
259 | // binary-compatible with the upstream i.e. new-world ecosystem, it's not |
260 | // being considered here). |
261 | // |
262 | // There are briefly some new-world systems with object ABI v0 binaries too. |
263 | // It is because these systems were built before the new ABI was finalized. |
264 | // These are not supported either due to the extremely small number of them, |
265 | // and the few impacted users are advised to simply rebuild world or |
266 | // reinstall a recent system. |
267 | if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1) |
268 | error(msg: toString(f) + ": unsupported object file ABI version" ); |
269 | } |
270 | |
271 | return target; |
272 | } |
273 | |
274 | int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const { |
275 | switch (type) { |
276 | default: |
277 | internalLinkerError(loc: getErrorLocation(loc: buf), |
278 | msg: "cannot read addend for relocation " + toString(type)); |
279 | return 0; |
280 | case R_LARCH_32: |
281 | case R_LARCH_TLS_DTPMOD32: |
282 | case R_LARCH_TLS_DTPREL32: |
283 | case R_LARCH_TLS_TPREL32: |
284 | return SignExtend64<32>(x: read32le(P: buf)); |
285 | case R_LARCH_64: |
286 | case R_LARCH_TLS_DTPMOD64: |
287 | case R_LARCH_TLS_DTPREL64: |
288 | case R_LARCH_TLS_TPREL64: |
289 | return read64le(P: buf); |
290 | case R_LARCH_RELATIVE: |
291 | case R_LARCH_IRELATIVE: |
292 | return config->is64 ? read64le(P: buf) : read32le(P: buf); |
293 | case R_LARCH_NONE: |
294 | case R_LARCH_JUMP_SLOT: |
295 | // These relocations are defined as not having an implicit addend. |
296 | return 0; |
297 | } |
298 | } |
299 | |
300 | void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const { |
301 | if (config->is64) |
302 | write64le(P: buf, V: in.plt->getVA()); |
303 | else |
304 | write32le(P: buf, V: in.plt->getVA()); |
305 | } |
306 | |
307 | void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const { |
308 | if (config->writeAddends) { |
309 | if (config->is64) |
310 | write64le(P: buf, V: s.getVA()); |
311 | else |
312 | write32le(P: buf, V: s.getVA()); |
313 | } |
314 | } |
315 | |
316 | void LoongArch::(uint8_t *buf) const { |
317 | // The LoongArch PLT is currently structured just like that of RISCV. |
318 | // Annoyingly, this means the PLT is still using `pcaddu12i` to perform |
319 | // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`), |
320 | // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that |
321 | // is used everywhere else involving PC-relative operations in the LoongArch |
322 | // ELF psABI v2.00. |
323 | // |
324 | // The `pcrel_{hi20,lo12}` operators are illustrative only and not really |
325 | // supported by LoongArch assemblers. |
326 | // |
327 | // pcaddu12i $t2, %pcrel_hi20(.got.plt) |
328 | // sub.[wd] $t1, $t1, $t3 |
329 | // ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve |
330 | // addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0] |
331 | // addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt) |
332 | // srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0] |
333 | // ld.[wd] $t0, $t0, Wordsize ; t0 = link_map |
334 | // jr $t3 |
335 | uint32_t offset = in.gotPlt->getVA() - in.plt->getVA(); |
336 | uint32_t sub = config->is64 ? SUB_D : SUB_W; |
337 | uint32_t ld = config->is64 ? LD_D : LD_W; |
338 | uint32_t addi = config->is64 ? ADDI_D : ADDI_W; |
339 | uint32_t srli = config->is64 ? SRLI_D : SRLI_W; |
340 | write32le(P: buf + 0, V: insn(op: PCADDU12I, d: R_T2, j: hi20(val: offset), k: 0)); |
341 | write32le(P: buf + 4, V: insn(op: sub, d: R_T1, j: R_T1, k: R_T3)); |
342 | write32le(P: buf + 8, V: insn(op: ld, d: R_T3, j: R_T2, k: lo12(val: offset))); |
343 | write32le(P: buf + 12, V: insn(op: addi, d: R_T1, j: R_T1, k: lo12(val: -target->pltHeaderSize - 12))); |
344 | write32le(P: buf + 16, V: insn(op: addi, d: R_T0, j: R_T2, k: lo12(val: offset))); |
345 | write32le(P: buf + 20, V: insn(op: srli, d: R_T1, j: R_T1, k: config->is64 ? 1 : 2)); |
346 | write32le(P: buf + 24, V: insn(op: ld, d: R_T0, j: R_T0, k: config->wordsize)); |
347 | write32le(P: buf + 28, V: insn(op: JIRL, d: R_ZERO, j: R_T3, k: 0)); |
348 | } |
349 | |
350 | void LoongArch::writePlt(uint8_t *buf, const Symbol &sym, |
351 | uint64_t pltEntryAddr) const { |
352 | // See the comment in writePltHeader for reason why pcaddu12i is used instead |
353 | // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days. |
354 | // |
355 | // pcaddu12i $t3, %pcrel_hi20(f@.got.plt) |
356 | // ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt) |
357 | // jirl $t1, $t3, 0 |
358 | // nop |
359 | uint32_t offset = sym.getGotPltVA() - pltEntryAddr; |
360 | write32le(P: buf + 0, V: insn(op: PCADDU12I, d: R_T3, j: hi20(val: offset), k: 0)); |
361 | write32le(P: buf + 4, |
362 | V: insn(op: config->is64 ? LD_D : LD_W, d: R_T3, j: R_T3, k: lo12(val: offset))); |
363 | write32le(P: buf + 8, V: insn(op: JIRL, d: R_T1, j: R_T3, k: 0)); |
364 | write32le(P: buf + 12, V: insn(op: ANDI, d: R_ZERO, j: R_ZERO, k: 0)); |
365 | } |
366 | |
367 | RelType LoongArch::getDynRel(RelType type) const { |
368 | return type == target->symbolicRel ? type |
369 | : static_cast<RelType>(R_LARCH_NONE); |
370 | } |
371 | |
372 | RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, |
373 | const uint8_t *loc) const { |
374 | switch (type) { |
375 | case R_LARCH_NONE: |
376 | case R_LARCH_MARK_LA: |
377 | case R_LARCH_MARK_PCREL: |
378 | return R_NONE; |
379 | case R_LARCH_32: |
380 | case R_LARCH_64: |
381 | case R_LARCH_ABS_HI20: |
382 | case R_LARCH_ABS_LO12: |
383 | case R_LARCH_ABS64_LO20: |
384 | case R_LARCH_ABS64_HI12: |
385 | return R_ABS; |
386 | case R_LARCH_PCALA_LO12: |
387 | // We could just R_ABS, but the JIRL instruction reuses the relocation type |
388 | // for a different purpose. The questionable usage is part of glibc 2.37 |
389 | // libc_nonshared.a [1], which is linked into user programs, so we have to |
390 | // work around it for a while, even if a new relocation type may be |
391 | // introduced in the future [2]. |
392 | // |
393 | // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a |
394 | // [2]: https://github.com/loongson/la-abi-specs/pull/3 |
395 | return isJirl(insn: read32le(P: loc)) ? R_PLT : R_ABS; |
396 | case R_LARCH_TLS_DTPREL32: |
397 | case R_LARCH_TLS_DTPREL64: |
398 | return R_DTPREL; |
399 | case R_LARCH_TLS_TPREL32: |
400 | case R_LARCH_TLS_TPREL64: |
401 | case R_LARCH_TLS_LE_HI20: |
402 | case R_LARCH_TLS_LE_LO12: |
403 | case R_LARCH_TLS_LE64_LO20: |
404 | case R_LARCH_TLS_LE64_HI12: |
405 | return R_TPREL; |
406 | case R_LARCH_ADD6: |
407 | case R_LARCH_ADD8: |
408 | case R_LARCH_ADD16: |
409 | case R_LARCH_ADD32: |
410 | case R_LARCH_ADD64: |
411 | case R_LARCH_ADD_ULEB128: |
412 | case R_LARCH_SUB6: |
413 | case R_LARCH_SUB8: |
414 | case R_LARCH_SUB16: |
415 | case R_LARCH_SUB32: |
416 | case R_LARCH_SUB64: |
417 | case R_LARCH_SUB_ULEB128: |
418 | // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse |
419 | // the RelExpr to avoid code duplication. |
420 | return R_RISCV_ADD; |
421 | case R_LARCH_32_PCREL: |
422 | case R_LARCH_64_PCREL: |
423 | case R_LARCH_PCREL20_S2: |
424 | return R_PC; |
425 | case R_LARCH_B16: |
426 | case R_LARCH_B21: |
427 | case R_LARCH_B26: |
428 | case R_LARCH_CALL36: |
429 | return R_PLT_PC; |
430 | case R_LARCH_GOT_PC_HI20: |
431 | case R_LARCH_GOT64_PC_LO20: |
432 | case R_LARCH_GOT64_PC_HI12: |
433 | case R_LARCH_TLS_IE_PC_HI20: |
434 | case R_LARCH_TLS_IE64_PC_LO20: |
435 | case R_LARCH_TLS_IE64_PC_HI12: |
436 | return R_LOONGARCH_GOT_PAGE_PC; |
437 | case R_LARCH_GOT_PC_LO12: |
438 | case R_LARCH_TLS_IE_PC_LO12: |
439 | return R_LOONGARCH_GOT; |
440 | case R_LARCH_TLS_LD_PC_HI20: |
441 | case R_LARCH_TLS_GD_PC_HI20: |
442 | return R_LOONGARCH_TLSGD_PAGE_PC; |
443 | case R_LARCH_PCALA_HI20: |
444 | // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT |
445 | // anyway so why waste time checking only to get everything relaxed back to |
446 | // it? |
447 | // |
448 | // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want |
449 | // both the HI20 and LO12 to potentially refer to the PLT. But in reality |
450 | // the HI20 reloc appears earlier, and the relocs don't contain enough |
451 | // information to let us properly resolve semantics per symbol. |
452 | // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20 |
453 | // relocs, hence it is nearly impossible to 100% accurately determine each |
454 | // HI20's "flavor" without taking big performance hits, in the presence of |
455 | // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far |
456 | // apart that relationship is not certain anymore), and programmer mistakes |
457 | // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3). |
458 | // |
459 | // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark |
460 | // every HI20 reloc referring to the same symbol differently; this is not |
461 | // feasible with the current function signature of getRelExpr that doesn't |
462 | // allow for such inter-pass state. |
463 | // |
464 | // So, unfortunately we have to again workaround this quirk the same way as |
465 | // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only |
466 | // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later |
467 | // stage. |
468 | return R_LOONGARCH_PLT_PAGE_PC; |
469 | case R_LARCH_PCALA64_LO20: |
470 | case R_LARCH_PCALA64_HI12: |
471 | return R_LOONGARCH_PAGE_PC; |
472 | case R_LARCH_GOT_HI20: |
473 | case R_LARCH_GOT_LO12: |
474 | case R_LARCH_GOT64_LO20: |
475 | case R_LARCH_GOT64_HI12: |
476 | case R_LARCH_TLS_IE_HI20: |
477 | case R_LARCH_TLS_IE_LO12: |
478 | case R_LARCH_TLS_IE64_LO20: |
479 | case R_LARCH_TLS_IE64_HI12: |
480 | return R_GOT; |
481 | case R_LARCH_TLS_LD_HI20: |
482 | return R_TLSLD_GOT; |
483 | case R_LARCH_TLS_GD_HI20: |
484 | return R_TLSGD_GOT; |
485 | case R_LARCH_RELAX: |
486 | return config->relax ? R_RELAX_HINT : R_NONE; |
487 | case R_LARCH_ALIGN: |
488 | return R_RELAX_HINT; |
489 | |
490 | // Other known relocs that are explicitly unimplemented: |
491 | // |
492 | // - psABI v1 relocs that need a stateful stack machine to work, and not |
493 | // required when implementing psABI v2; |
494 | // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the |
495 | // two GNU vtable-related relocs). |
496 | // |
497 | // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51 |
498 | default: |
499 | error(msg: getErrorLocation(loc) + "unknown relocation (" + Twine(type) + |
500 | ") against symbol " + toString(s)); |
501 | return R_NONE; |
502 | } |
503 | } |
504 | |
505 | bool LoongArch::usesOnlyLowPageBits(RelType type) const { |
506 | switch (type) { |
507 | default: |
508 | return false; |
509 | case R_LARCH_PCALA_LO12: |
510 | case R_LARCH_GOT_LO12: |
511 | case R_LARCH_GOT_PC_LO12: |
512 | case R_LARCH_TLS_IE_PC_LO12: |
513 | return true; |
514 | } |
515 | } |
516 | |
517 | void LoongArch::relocate(uint8_t *loc, const Relocation &rel, |
518 | uint64_t val) const { |
519 | switch (rel.type) { |
520 | case R_LARCH_32_PCREL: |
521 | checkInt(loc, v: val, n: 32, rel); |
522 | [[fallthrough]]; |
523 | case R_LARCH_32: |
524 | case R_LARCH_TLS_DTPREL32: |
525 | write32le(P: loc, V: val); |
526 | return; |
527 | case R_LARCH_64: |
528 | case R_LARCH_TLS_DTPREL64: |
529 | case R_LARCH_64_PCREL: |
530 | write64le(P: loc, V: val); |
531 | return; |
532 | |
533 | case R_LARCH_PCREL20_S2: |
534 | checkInt(loc, v: val, n: 22, rel); |
535 | checkAlignment(loc, v: val, n: 4, rel); |
536 | write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: val >> 2)); |
537 | return; |
538 | |
539 | case R_LARCH_B16: |
540 | checkInt(loc, v: val, n: 18, rel); |
541 | checkAlignment(loc, v: val, n: 4, rel); |
542 | write32le(P: loc, V: setK16(insn: read32le(P: loc), imm: val >> 2)); |
543 | return; |
544 | |
545 | case R_LARCH_B21: |
546 | checkInt(loc, v: val, n: 23, rel); |
547 | checkAlignment(loc, v: val, n: 4, rel); |
548 | write32le(P: loc, V: setD5k16(insn: read32le(P: loc), imm: val >> 2)); |
549 | return; |
550 | |
551 | case R_LARCH_B26: |
552 | checkInt(loc, v: val, n: 28, rel); |
553 | checkAlignment(loc, v: val, n: 4, rel); |
554 | write32le(P: loc, V: setD10k16(insn: read32le(P: loc), imm: val >> 2)); |
555 | return; |
556 | |
557 | case R_LARCH_CALL36: { |
558 | // This relocation is designed for adjacent pcaddu18i+jirl pairs that |
559 | // are patched in one time. Because of sign extension of these insns' |
560 | // immediate fields, the relocation range is [-128G - 0x20000, +128G - |
561 | // 0x20000) (of course must be 4-byte aligned). |
562 | if (((int64_t)val + 0x20000) != llvm::SignExtend64(X: val + 0x20000, B: 38)) |
563 | reportRangeError(loc, rel, v: Twine(val), min: llvm::minIntN(N: 38) - 0x20000, |
564 | max: llvm::maxIntN(N: 38) - 0x20000); |
565 | checkAlignment(loc, v: val, n: 4, rel); |
566 | // Since jirl performs sign extension on the offset immediate, adds (1<<17) |
567 | // to original val to get the correct hi20. |
568 | uint32_t hi20 = extractBits(v: val + (1 << 17), begin: 37, end: 18); |
569 | // Despite the name, the lower part is actually 18 bits with 4-byte aligned. |
570 | uint32_t lo16 = extractBits(v: val, begin: 17, end: 2); |
571 | write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: hi20)); |
572 | write32le(P: loc + 4, V: setK16(insn: read32le(P: loc + 4), imm: lo16)); |
573 | return; |
574 | } |
575 | |
576 | // Relocs intended for `addi`, `ld` or `st`. |
577 | case R_LARCH_PCALA_LO12: |
578 | // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12 |
579 | // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes |
580 | // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly |
581 | // its immediate slot width is different too (16, not 12). |
582 | // In this case, process like an R_LARCH_B16, but without overflow checking |
583 | // and only taking the value's lowest 12 bits. |
584 | if (isJirl(insn: read32le(P: loc))) { |
585 | checkAlignment(loc, v: val, n: 4, rel); |
586 | val = SignExtend64<12>(x: val); |
587 | write32le(P: loc, V: setK16(insn: read32le(P: loc), imm: val >> 2)); |
588 | return; |
589 | } |
590 | [[fallthrough]]; |
591 | case R_LARCH_ABS_LO12: |
592 | case R_LARCH_GOT_PC_LO12: |
593 | case R_LARCH_GOT_LO12: |
594 | case R_LARCH_TLS_LE_LO12: |
595 | case R_LARCH_TLS_IE_PC_LO12: |
596 | case R_LARCH_TLS_IE_LO12: |
597 | write32le(P: loc, V: setK12(insn: read32le(P: loc), imm: extractBits(v: val, begin: 11, end: 0))); |
598 | return; |
599 | |
600 | // Relocs intended for `lu12i.w` or `pcalau12i`. |
601 | case R_LARCH_ABS_HI20: |
602 | case R_LARCH_PCALA_HI20: |
603 | case R_LARCH_GOT_PC_HI20: |
604 | case R_LARCH_GOT_HI20: |
605 | case R_LARCH_TLS_LE_HI20: |
606 | case R_LARCH_TLS_IE_PC_HI20: |
607 | case R_LARCH_TLS_IE_HI20: |
608 | case R_LARCH_TLS_LD_PC_HI20: |
609 | case R_LARCH_TLS_LD_HI20: |
610 | case R_LARCH_TLS_GD_PC_HI20: |
611 | case R_LARCH_TLS_GD_HI20: |
612 | write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: extractBits(v: val, begin: 31, end: 12))); |
613 | return; |
614 | |
615 | // Relocs intended for `lu32i.d`. |
616 | case R_LARCH_ABS64_LO20: |
617 | case R_LARCH_PCALA64_LO20: |
618 | case R_LARCH_GOT64_PC_LO20: |
619 | case R_LARCH_GOT64_LO20: |
620 | case R_LARCH_TLS_LE64_LO20: |
621 | case R_LARCH_TLS_IE64_PC_LO20: |
622 | case R_LARCH_TLS_IE64_LO20: |
623 | write32le(P: loc, V: setJ20(insn: read32le(P: loc), imm: extractBits(v: val, begin: 51, end: 32))); |
624 | return; |
625 | |
626 | // Relocs intended for `lu52i.d`. |
627 | case R_LARCH_ABS64_HI12: |
628 | case R_LARCH_PCALA64_HI12: |
629 | case R_LARCH_GOT64_PC_HI12: |
630 | case R_LARCH_GOT64_HI12: |
631 | case R_LARCH_TLS_LE64_HI12: |
632 | case R_LARCH_TLS_IE64_PC_HI12: |
633 | case R_LARCH_TLS_IE64_HI12: |
634 | write32le(P: loc, V: setK12(insn: read32le(P: loc), imm: extractBits(v: val, begin: 63, end: 52))); |
635 | return; |
636 | |
637 | case R_LARCH_ADD6: |
638 | *loc = (*loc & 0xc0) | ((*loc + val) & 0x3f); |
639 | return; |
640 | case R_LARCH_ADD8: |
641 | *loc += val; |
642 | return; |
643 | case R_LARCH_ADD16: |
644 | write16le(P: loc, V: read16le(P: loc) + val); |
645 | return; |
646 | case R_LARCH_ADD32: |
647 | write32le(P: loc, V: read32le(P: loc) + val); |
648 | return; |
649 | case R_LARCH_ADD64: |
650 | write64le(P: loc, V: read64le(P: loc) + val); |
651 | return; |
652 | case R_LARCH_ADD_ULEB128: |
653 | handleUleb128(loc, val); |
654 | return; |
655 | case R_LARCH_SUB6: |
656 | *loc = (*loc & 0xc0) | ((*loc - val) & 0x3f); |
657 | return; |
658 | case R_LARCH_SUB8: |
659 | *loc -= val; |
660 | return; |
661 | case R_LARCH_SUB16: |
662 | write16le(P: loc, V: read16le(P: loc) - val); |
663 | return; |
664 | case R_LARCH_SUB32: |
665 | write32le(P: loc, V: read32le(P: loc) - val); |
666 | return; |
667 | case R_LARCH_SUB64: |
668 | write64le(P: loc, V: read64le(P: loc) - val); |
669 | return; |
670 | case R_LARCH_SUB_ULEB128: |
671 | handleUleb128(loc, val: -val); |
672 | return; |
673 | |
674 | case R_LARCH_MARK_LA: |
675 | case R_LARCH_MARK_PCREL: |
676 | // no-op |
677 | return; |
678 | |
679 | case R_LARCH_RELAX: |
680 | return; // Ignored (for now) |
681 | |
682 | default: |
683 | llvm_unreachable("unknown relocation" ); |
684 | } |
685 | } |
686 | |
687 | static bool relax(InputSection &sec) { |
688 | const uint64_t secAddr = sec.getVA(); |
689 | const MutableArrayRef<Relocation> relocs = sec.relocs(); |
690 | auto &aux = *sec.relaxAux; |
691 | bool changed = false; |
692 | ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors); |
693 | uint64_t delta = 0; |
694 | |
695 | std::fill_n(first: aux.relocTypes.get(), n: relocs.size(), value: R_LARCH_NONE); |
696 | aux.writes.clear(); |
697 | for (auto [i, r] : llvm::enumerate(First: relocs)) { |
698 | const uint64_t loc = secAddr + r.offset - delta; |
699 | uint32_t &cur = aux.relocDeltas[i], remove = 0; |
700 | switch (r.type) { |
701 | case R_LARCH_ALIGN: { |
702 | const uint64_t addend = |
703 | r.sym->isUndefined() ? Log2_64(Value: r.addend) + 1 : r.addend; |
704 | const uint64_t allBytes = (1ULL << (addend & 0xff)) - 4; |
705 | const uint64_t align = 1ULL << (addend & 0xff); |
706 | const uint64_t maxBytes = addend >> 8; |
707 | const uint64_t off = loc & (align - 1); |
708 | const uint64_t curBytes = off == 0 ? 0 : align - off; |
709 | // All bytes beyond the alignment boundary should be removed. |
710 | // If emit bytes more than max bytes to emit, remove all. |
711 | if (maxBytes != 0 && curBytes > maxBytes) |
712 | remove = allBytes; |
713 | else |
714 | remove = allBytes - curBytes; |
715 | // If we can't satisfy this alignment, we've found a bad input. |
716 | if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < 0)) { |
717 | errorOrWarn(msg: getErrorLocation(loc: (const uint8_t *)loc) + |
718 | "insufficient padding bytes for " + lld::toString(type: r.type) + |
719 | ": " + Twine(allBytes) + " bytes available for " + |
720 | "requested alignment of " + Twine(align) + " bytes" ); |
721 | remove = 0; |
722 | } |
723 | break; |
724 | } |
725 | } |
726 | |
727 | // For all anchors whose offsets are <= r.offset, they are preceded by |
728 | // the previous relocation whose `relocDeltas` value equals `delta`. |
729 | // Decrease their st_value and update their st_size. |
730 | for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(N: 1)) { |
731 | if (sa[0].end) |
732 | sa[0].d->size = sa[0].offset - delta - sa[0].d->value; |
733 | else |
734 | sa[0].d->value = sa[0].offset - delta; |
735 | } |
736 | delta += remove; |
737 | if (delta != cur) { |
738 | cur = delta; |
739 | changed = true; |
740 | } |
741 | } |
742 | |
743 | for (const SymbolAnchor &a : sa) { |
744 | if (a.end) |
745 | a.d->size = a.offset - delta - a.d->value; |
746 | else |
747 | a.d->value = a.offset - delta; |
748 | } |
749 | // Inform assignAddresses that the size has changed. |
750 | if (!isUInt<32>(x: delta)) |
751 | fatal(msg: "section size decrease is too large: " + Twine(delta)); |
752 | sec.bytesDropped = delta; |
753 | return changed; |
754 | } |
755 | |
756 | // When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in |
757 | // the absence of a linker script. For call and load/store R_LARCH_RELAX, code |
758 | // shrinkage may reduce displacement and make more relocations eligible for |
759 | // relaxation. Code shrinkage may increase displacement to a call/load/store |
760 | // target at a higher fixed address, invalidating an earlier relaxation. Any |
761 | // change in section sizes can have cascading effect and require another |
762 | // relaxation pass. |
763 | bool LoongArch::relaxOnce(int pass) const { |
764 | if (config->relocatable) |
765 | return false; |
766 | |
767 | if (pass == 0) |
768 | initSymbolAnchors(); |
769 | |
770 | SmallVector<InputSection *, 0> storage; |
771 | bool changed = false; |
772 | for (OutputSection *osec : outputSections) { |
773 | if (!(osec->flags & SHF_EXECINSTR)) |
774 | continue; |
775 | for (InputSection *sec : getInputSections(os: *osec, storage)) |
776 | changed |= relax(sec&: *sec); |
777 | } |
778 | return changed; |
779 | } |
780 | |
781 | void LoongArch::finalizeRelax(int passes) const { |
782 | log(msg: "relaxation passes: " + Twine(passes)); |
783 | SmallVector<InputSection *, 0> storage; |
784 | for (OutputSection *osec : outputSections) { |
785 | if (!(osec->flags & SHF_EXECINSTR)) |
786 | continue; |
787 | for (InputSection *sec : getInputSections(os: *osec, storage)) { |
788 | RelaxAux &aux = *sec->relaxAux; |
789 | if (!aux.relocDeltas) |
790 | continue; |
791 | |
792 | MutableArrayRef<Relocation> rels = sec->relocs(); |
793 | ArrayRef<uint8_t> old = sec->content(); |
794 | size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1]; |
795 | uint8_t *p = context().bAlloc.Allocate<uint8_t>(Num: newSize); |
796 | uint64_t offset = 0; |
797 | int64_t delta = 0; |
798 | sec->content_ = p; |
799 | sec->size = newSize; |
800 | sec->bytesDropped = 0; |
801 | |
802 | // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite |
803 | // instructions for relaxed relocations. |
804 | for (size_t i = 0, e = rels.size(); i != e; ++i) { |
805 | uint32_t remove = aux.relocDeltas[i] - delta; |
806 | delta = aux.relocDeltas[i]; |
807 | if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE) |
808 | continue; |
809 | |
810 | // Copy from last location to the current relocated location. |
811 | const Relocation &r = rels[i]; |
812 | uint64_t size = r.offset - offset; |
813 | memcpy(dest: p, src: old.data() + offset, n: size); |
814 | p += size; |
815 | offset = r.offset + remove; |
816 | } |
817 | memcpy(dest: p, src: old.data() + offset, n: old.size() - offset); |
818 | |
819 | // Subtract the previous relocDeltas value from the relocation offset. |
820 | // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease |
821 | // their r_offset by the same delta. |
822 | delta = 0; |
823 | for (size_t i = 0, e = rels.size(); i != e;) { |
824 | uint64_t cur = rels[i].offset; |
825 | do { |
826 | rels[i].offset -= delta; |
827 | if (aux.relocTypes[i] != R_LARCH_NONE) |
828 | rels[i].type = aux.relocTypes[i]; |
829 | } while (++i != e && rels[i].offset == cur); |
830 | delta = aux.relocDeltas[i - 1]; |
831 | } |
832 | } |
833 | } |
834 | } |
835 | |
836 | TargetInfo *elf::getLoongArchTargetInfo() { |
837 | static LoongArch target; |
838 | return ⌖ |
839 | } |
840 | |