1 | //===- X86.cpp ------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "OutputSections.h" |
10 | #include "Symbols.h" |
11 | #include "SyntheticSections.h" |
12 | #include "Target.h" |
13 | #include "lld/Common/ErrorHandler.h" |
14 | #include "llvm/Support/Endian.h" |
15 | |
16 | using namespace llvm; |
17 | using namespace llvm::support::endian; |
18 | using namespace llvm::ELF; |
19 | using namespace lld; |
20 | using namespace lld::elf; |
21 | |
22 | namespace { |
23 | class X86 : public TargetInfo { |
24 | public: |
25 | X86(); |
26 | int getTlsGdRelaxSkip(RelType type) const override; |
27 | RelExpr getRelExpr(RelType type, const Symbol &s, |
28 | const uint8_t *loc) const override; |
29 | int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; |
30 | void writeGotPltHeader(uint8_t *buf) const override; |
31 | RelType getDynRel(RelType type) const override; |
32 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
33 | void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; |
34 | void writePltHeader(uint8_t *buf) const override; |
35 | void writePlt(uint8_t *buf, const Symbol &sym, |
36 | uint64_t pltEntryAddr) const override; |
37 | void relocate(uint8_t *loc, const Relocation &rel, |
38 | uint64_t val) const override; |
39 | |
40 | RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; |
41 | void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; |
42 | }; |
43 | } // namespace |
44 | |
45 | X86::X86() { |
46 | copyRel = R_386_COPY; |
47 | gotRel = R_386_GLOB_DAT; |
48 | pltRel = R_386_JUMP_SLOT; |
49 | iRelativeRel = R_386_IRELATIVE; |
50 | relativeRel = R_386_RELATIVE; |
51 | symbolicRel = R_386_32; |
52 | tlsDescRel = R_386_TLS_DESC; |
53 | tlsGotRel = R_386_TLS_TPOFF; |
54 | tlsModuleIndexRel = R_386_TLS_DTPMOD32; |
55 | tlsOffsetRel = R_386_TLS_DTPOFF32; |
56 | gotBaseSymInGotPlt = true; |
57 | pltHeaderSize = 16; |
58 | pltEntrySize = 16; |
59 | ipltEntrySize = 16; |
60 | trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3 |
61 | |
62 | // Align to the non-PAE large page size (known as a superpage or huge page). |
63 | // FreeBSD automatically promotes large, superpage-aligned allocations. |
64 | defaultImageBase = 0x400000; |
65 | } |
66 | |
67 | int X86::getTlsGdRelaxSkip(RelType type) const { |
68 | // TLSDESC relocations are processed separately. See relaxTlsGdToLe below. |
69 | return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2; |
70 | } |
71 | |
72 | RelExpr X86::getRelExpr(RelType type, const Symbol &s, |
73 | const uint8_t *loc) const { |
74 | switch (type) { |
75 | case R_386_8: |
76 | case R_386_16: |
77 | case R_386_32: |
78 | return R_ABS; |
79 | case R_386_TLS_LDO_32: |
80 | return R_DTPREL; |
81 | case R_386_TLS_GD: |
82 | return R_TLSGD_GOTPLT; |
83 | case R_386_TLS_LDM: |
84 | return R_TLSLD_GOTPLT; |
85 | case R_386_PLT32: |
86 | return R_PLT_PC; |
87 | case R_386_PC8: |
88 | case R_386_PC16: |
89 | case R_386_PC32: |
90 | return R_PC; |
91 | case R_386_GOTPC: |
92 | return R_GOTPLTONLY_PC; |
93 | case R_386_TLS_IE: |
94 | return R_GOT; |
95 | case R_386_GOT32: |
96 | case R_386_GOT32X: |
97 | // These relocations are arguably mis-designed because their calculations |
98 | // depend on the instructions they are applied to. This is bad because we |
99 | // usually don't care about whether the target section contains valid |
100 | // machine instructions or not. But this is part of the documented ABI, so |
101 | // we had to implement as the standard requires. |
102 | // |
103 | // x86 does not support PC-relative data access. Therefore, in order to |
104 | // access GOT contents, a GOT address needs to be known at link-time |
105 | // (which means non-PIC) or compilers have to emit code to get a GOT |
106 | // address at runtime (which means code is position-independent but |
107 | // compilers need to emit extra code for each GOT access.) This decision |
108 | // is made at compile-time. In the latter case, compilers emit code to |
109 | // load a GOT address to a register, which is usually %ebx. |
110 | // |
111 | // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or |
112 | // foo@GOT(%ebx). |
113 | // |
114 | // foo@GOT is not usable in PIC. If we are creating a PIC output and if we |
115 | // find such relocation, we should report an error. foo@GOT is resolved to |
116 | // an *absolute* address of foo's GOT entry, because both GOT address and |
117 | // foo's offset are known. In other words, it's G + A. |
118 | // |
119 | // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to |
120 | // foo's GOT entry in the table, because GOT address is not known but foo's |
121 | // offset in the table is known. It's G + A - GOT. |
122 | // |
123 | // It's unfortunate that compilers emit the same relocation for these |
124 | // different use cases. In order to distinguish them, we have to read a |
125 | // machine instruction. |
126 | // |
127 | // The following code implements it. We assume that Loc[0] is the first byte |
128 | // of a displacement or an immediate field of a valid machine |
129 | // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at |
130 | // the byte, we can determine whether the instruction uses the operand as an |
131 | // absolute address (R_GOT) or a register-relative address (R_GOTPLT). |
132 | return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT; |
133 | case R_386_TLS_GOTDESC: |
134 | return R_TLSDESC_GOTPLT; |
135 | case R_386_TLS_DESC_CALL: |
136 | return R_TLSDESC_CALL; |
137 | case R_386_TLS_GOTIE: |
138 | return R_GOTPLT; |
139 | case R_386_GOTOFF: |
140 | return R_GOTPLTREL; |
141 | case R_386_TLS_LE: |
142 | return R_TPREL; |
143 | case R_386_TLS_LE_32: |
144 | return R_TPREL_NEG; |
145 | case R_386_NONE: |
146 | return R_NONE; |
147 | default: |
148 | error(msg: getErrorLocation(loc) + "unknown relocation (" + Twine(type) + |
149 | ") against symbol " + toString(s)); |
150 | return R_NONE; |
151 | } |
152 | } |
153 | |
154 | RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const { |
155 | switch (expr) { |
156 | default: |
157 | return expr; |
158 | case R_RELAX_TLS_GD_TO_IE: |
159 | return R_RELAX_TLS_GD_TO_IE_GOTPLT; |
160 | case R_RELAX_TLS_GD_TO_LE: |
161 | return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG |
162 | : R_RELAX_TLS_GD_TO_LE; |
163 | } |
164 | } |
165 | |
166 | void X86::(uint8_t *buf) const { |
167 | write32le(P: buf, V: mainPart->dynamic->getVA()); |
168 | } |
169 | |
170 | void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const { |
171 | // Entries in .got.plt initially points back to the corresponding |
172 | // PLT entries with a fixed offset to skip the first instruction. |
173 | write32le(P: buf, V: s.getPltVA() + 6); |
174 | } |
175 | |
176 | void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const { |
177 | // An x86 entry is the address of the ifunc resolver function. |
178 | write32le(P: buf, V: s.getVA()); |
179 | } |
180 | |
181 | RelType X86::getDynRel(RelType type) const { |
182 | if (type == R_386_TLS_LE) |
183 | return R_386_TLS_TPOFF; |
184 | if (type == R_386_TLS_LE_32) |
185 | return R_386_TLS_TPOFF32; |
186 | return type; |
187 | } |
188 | |
189 | void X86::(uint8_t *buf) const { |
190 | if (config->isPic) { |
191 | const uint8_t v[] = { |
192 | 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx) |
193 | 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx) |
194 | 0x90, 0x90, 0x90, 0x90 // nop |
195 | }; |
196 | memcpy(dest: buf, src: v, n: sizeof(v)); |
197 | return; |
198 | } |
199 | |
200 | const uint8_t pltData[] = { |
201 | 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4) |
202 | 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8) |
203 | 0x90, 0x90, 0x90, 0x90, // nop |
204 | }; |
205 | memcpy(dest: buf, src: pltData, n: sizeof(pltData)); |
206 | uint32_t gotPlt = in.gotPlt->getVA(); |
207 | write32le(P: buf + 2, V: gotPlt + 4); |
208 | write32le(P: buf + 8, V: gotPlt + 8); |
209 | } |
210 | |
211 | void X86::writePlt(uint8_t *buf, const Symbol &sym, |
212 | uint64_t pltEntryAddr) const { |
213 | unsigned relOff = in.relaPlt->entsize * sym.getPltIdx(); |
214 | if (config->isPic) { |
215 | const uint8_t inst[] = { |
216 | 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) |
217 | 0x68, 0, 0, 0, 0, // pushl $reloc_offset |
218 | 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC |
219 | }; |
220 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
221 | write32le(P: buf + 2, V: sym.getGotPltVA() - in.gotPlt->getVA()); |
222 | } else { |
223 | const uint8_t inst[] = { |
224 | 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT |
225 | 0x68, 0, 0, 0, 0, // pushl $reloc_offset |
226 | 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC |
227 | }; |
228 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
229 | write32le(P: buf + 2, V: sym.getGotPltVA()); |
230 | } |
231 | |
232 | write32le(P: buf + 7, V: relOff); |
233 | write32le(P: buf + 12, V: in.plt->getVA() - pltEntryAddr - 16); |
234 | } |
235 | |
236 | int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const { |
237 | switch (type) { |
238 | case R_386_8: |
239 | case R_386_PC8: |
240 | return SignExtend64<8>(x: *buf); |
241 | case R_386_16: |
242 | case R_386_PC16: |
243 | return SignExtend64<16>(x: read16le(P: buf)); |
244 | case R_386_32: |
245 | case R_386_GLOB_DAT: |
246 | case R_386_GOT32: |
247 | case R_386_GOT32X: |
248 | case R_386_GOTOFF: |
249 | case R_386_GOTPC: |
250 | case R_386_IRELATIVE: |
251 | case R_386_PC32: |
252 | case R_386_PLT32: |
253 | case R_386_RELATIVE: |
254 | case R_386_TLS_GOTDESC: |
255 | case R_386_TLS_DESC_CALL: |
256 | case R_386_TLS_DTPMOD32: |
257 | case R_386_TLS_DTPOFF32: |
258 | case R_386_TLS_LDO_32: |
259 | case R_386_TLS_LDM: |
260 | case R_386_TLS_IE: |
261 | case R_386_TLS_IE_32: |
262 | case R_386_TLS_LE: |
263 | case R_386_TLS_LE_32: |
264 | case R_386_TLS_GD: |
265 | case R_386_TLS_GD_32: |
266 | case R_386_TLS_GOTIE: |
267 | case R_386_TLS_TPOFF: |
268 | case R_386_TLS_TPOFF32: |
269 | return SignExtend64<32>(x: read32le(P: buf)); |
270 | case R_386_TLS_DESC: |
271 | return SignExtend64<32>(x: read32le(P: buf + 4)); |
272 | case R_386_NONE: |
273 | case R_386_JUMP_SLOT: |
274 | // These relocations are defined as not having an implicit addend. |
275 | return 0; |
276 | default: |
277 | internalLinkerError(loc: getErrorLocation(loc: buf), |
278 | msg: "cannot read addend for relocation " + toString(type)); |
279 | return 0; |
280 | } |
281 | } |
282 | |
283 | void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { |
284 | switch (rel.type) { |
285 | case R_386_8: |
286 | // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are |
287 | // being used for some 16-bit programs such as boot loaders, so |
288 | // we want to support them. |
289 | checkIntUInt(loc, v: val, n: 8, rel); |
290 | *loc = val; |
291 | break; |
292 | case R_386_PC8: |
293 | checkInt(loc, v: val, n: 8, rel); |
294 | *loc = val; |
295 | break; |
296 | case R_386_16: |
297 | checkIntUInt(loc, v: val, n: 16, rel); |
298 | write16le(P: loc, V: val); |
299 | break; |
300 | case R_386_PC16: |
301 | // R_386_PC16 is normally used with 16 bit code. In that situation |
302 | // the PC is 16 bits, just like the addend. This means that it can |
303 | // point from any 16 bit address to any other if the possibility |
304 | // of wrapping is included. |
305 | // The only restriction we have to check then is that the destination |
306 | // address fits in 16 bits. That is impossible to do here. The problem is |
307 | // that we are passed the final value, which already had the |
308 | // current location subtracted from it. |
309 | // We just check that Val fits in 17 bits. This misses some cases, but |
310 | // should have no false positives. |
311 | checkInt(loc, v: val, n: 17, rel); |
312 | write16le(P: loc, V: val); |
313 | break; |
314 | case R_386_32: |
315 | case R_386_GOT32: |
316 | case R_386_GOT32X: |
317 | case R_386_GOTOFF: |
318 | case R_386_GOTPC: |
319 | case R_386_PC32: |
320 | case R_386_PLT32: |
321 | case R_386_RELATIVE: |
322 | case R_386_TLS_GOTDESC: |
323 | case R_386_TLS_DESC_CALL: |
324 | case R_386_TLS_DTPMOD32: |
325 | case R_386_TLS_DTPOFF32: |
326 | case R_386_TLS_GD: |
327 | case R_386_TLS_GOTIE: |
328 | case R_386_TLS_IE: |
329 | case R_386_TLS_LDM: |
330 | case R_386_TLS_LDO_32: |
331 | case R_386_TLS_LE: |
332 | case R_386_TLS_LE_32: |
333 | case R_386_TLS_TPOFF: |
334 | case R_386_TLS_TPOFF32: |
335 | checkInt(loc, v: val, n: 32, rel); |
336 | write32le(P: loc, V: val); |
337 | break; |
338 | case R_386_TLS_DESC: |
339 | // The addend is stored in the second 32-bit word. |
340 | write32le(P: loc + 4, V: val); |
341 | break; |
342 | default: |
343 | llvm_unreachable("unknown relocation" ); |
344 | } |
345 | } |
346 | |
347 | static void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { |
348 | if (rel.type == R_386_TLS_GD) { |
349 | // Convert (loc[-2] == 0x04) |
350 | // leal x@tlsgd(, %ebx, 1), %eax |
351 | // call ___tls_get_addr@plt |
352 | // or |
353 | // leal x@tlsgd(%reg), %eax |
354 | // call *___tls_get_addr@got(%reg) |
355 | // to |
356 | const uint8_t inst[] = { |
357 | 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax |
358 | 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax |
359 | }; |
360 | uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; |
361 | memcpy(dest: w, src: inst, n: sizeof(inst)); |
362 | write32le(P: w + 8, V: val); |
363 | } else if (rel.type == R_386_TLS_GOTDESC) { |
364 | // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax. |
365 | // |
366 | // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction. |
367 | if (memcmp(s1: loc - 2, s2: "\x8d\x83" , n: 2)) { |
368 | error(msg: getErrorLocation(loc: loc - 2) + |
369 | "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax" ); |
370 | return; |
371 | } |
372 | loc[-1] = 0x05; |
373 | write32le(P: loc, V: val); |
374 | } else { |
375 | // Convert call *x@tlsdesc(%eax) to xchg ax, ax. |
376 | assert(rel.type == R_386_TLS_DESC_CALL); |
377 | loc[0] = 0x66; |
378 | loc[1] = 0x90; |
379 | } |
380 | } |
381 | |
382 | static void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) { |
383 | if (rel.type == R_386_TLS_GD) { |
384 | // Convert (loc[-2] == 0x04) |
385 | // leal x@tlsgd(, %ebx, 1), %eax |
386 | // call ___tls_get_addr@plt |
387 | // or |
388 | // leal x@tlsgd(%reg), %eax |
389 | // call *___tls_get_addr@got(%reg) |
390 | const uint8_t inst[] = { |
391 | 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax |
392 | 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax |
393 | }; |
394 | uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2; |
395 | memcpy(dest: w, src: inst, n: sizeof(inst)); |
396 | write32le(P: w + 8, V: val); |
397 | } else if (rel.type == R_386_TLS_GOTDESC) { |
398 | // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax. |
399 | if (memcmp(s1: loc - 2, s2: "\x8d\x83" , n: 2)) { |
400 | error(msg: getErrorLocation(loc: loc - 2) + |
401 | "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax" ); |
402 | return; |
403 | } |
404 | loc[-2] = 0x8b; |
405 | write32le(P: loc, V: val); |
406 | } else { |
407 | // Convert call *x@tlsdesc(%eax) to xchg ax, ax. |
408 | assert(rel.type == R_386_TLS_DESC_CALL); |
409 | loc[0] = 0x66; |
410 | loc[1] = 0x90; |
411 | } |
412 | } |
413 | |
414 | // In some conditions, relocations can be optimized to avoid using GOT. |
415 | // This function does that for Initial Exec to Local Exec case. |
416 | static void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { |
417 | // Ulrich's document section 6.2 says that @gotntpoff can |
418 | // be used with MOVL or ADDL instructions. |
419 | // @indntpoff is similar to @gotntpoff, but for use in |
420 | // position dependent code. |
421 | uint8_t reg = (loc[-1] >> 3) & 7; |
422 | |
423 | if (rel.type == R_386_TLS_IE) { |
424 | if (loc[-1] == 0xa1) { |
425 | // "movl foo@indntpoff,%eax" -> "movl $foo,%eax" |
426 | // This case is different from the generic case below because |
427 | // this is a 5 byte instruction while below is 6 bytes. |
428 | loc[-1] = 0xb8; |
429 | } else if (loc[-2] == 0x8b) { |
430 | // "movl foo@indntpoff,%reg" -> "movl $foo,%reg" |
431 | loc[-2] = 0xc7; |
432 | loc[-1] = 0xc0 | reg; |
433 | } else { |
434 | // "addl foo@indntpoff,%reg" -> "addl $foo,%reg" |
435 | loc[-2] = 0x81; |
436 | loc[-1] = 0xc0 | reg; |
437 | } |
438 | } else { |
439 | assert(rel.type == R_386_TLS_GOTIE); |
440 | if (loc[-2] == 0x8b) { |
441 | // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg" |
442 | loc[-2] = 0xc7; |
443 | loc[-1] = 0xc0 | reg; |
444 | } else { |
445 | // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg" |
446 | loc[-2] = 0x8d; |
447 | loc[-1] = 0x80 | (reg << 3) | reg; |
448 | } |
449 | } |
450 | write32le(P: loc, V: val); |
451 | } |
452 | |
453 | static void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) { |
454 | if (rel.type == R_386_TLS_LDO_32) { |
455 | write32le(P: loc, V: val); |
456 | return; |
457 | } |
458 | |
459 | if (loc[4] == 0xe8) { |
460 | // Convert |
461 | // leal x(%reg),%eax |
462 | // call ___tls_get_addr@plt |
463 | // to |
464 | const uint8_t inst[] = { |
465 | 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax |
466 | 0x90, // nop |
467 | 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi |
468 | }; |
469 | memcpy(dest: loc - 2, src: inst, n: sizeof(inst)); |
470 | return; |
471 | } |
472 | |
473 | // Convert |
474 | // leal x(%reg),%eax |
475 | // call *___tls_get_addr@got(%reg) |
476 | // to |
477 | const uint8_t inst[] = { |
478 | 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax |
479 | 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi |
480 | }; |
481 | memcpy(dest: loc - 2, src: inst, n: sizeof(inst)); |
482 | } |
483 | |
484 | void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { |
485 | uint64_t secAddr = sec.getOutputSection()->addr; |
486 | if (auto *s = dyn_cast<InputSection>(Val: &sec)) |
487 | secAddr += s->outSecOff; |
488 | for (const Relocation &rel : sec.relocs()) { |
489 | uint8_t *loc = buf + rel.offset; |
490 | const uint64_t val = SignExtend64( |
491 | X: sec.getRelocTargetVA(File: sec.file, Type: rel.type, A: rel.addend, |
492 | P: secAddr + rel.offset, Sym: *rel.sym, Expr: rel.expr), |
493 | B: 32); |
494 | switch (rel.expr) { |
495 | case R_RELAX_TLS_GD_TO_IE_GOTPLT: |
496 | relaxTlsGdToIe(loc, rel, val); |
497 | continue; |
498 | case R_RELAX_TLS_GD_TO_LE: |
499 | case R_RELAX_TLS_GD_TO_LE_NEG: |
500 | relaxTlsGdToLe(loc, rel, val); |
501 | continue; |
502 | case R_RELAX_TLS_LD_TO_LE: |
503 | relaxTlsLdToLe(loc, rel, val); |
504 | break; |
505 | case R_RELAX_TLS_IE_TO_LE: |
506 | relaxTlsIeToLe(loc, rel, val); |
507 | continue; |
508 | default: |
509 | relocate(loc, rel, val); |
510 | break; |
511 | } |
512 | } |
513 | } |
514 | |
515 | // If Intel Indirect Branch Tracking is enabled, we have to emit special PLT |
516 | // entries containing endbr32 instructions. A PLT entry will be split into two |
517 | // parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt). |
518 | namespace { |
519 | class IntelIBT : public X86 { |
520 | public: |
521 | IntelIBT(); |
522 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
523 | void writePlt(uint8_t *buf, const Symbol &sym, |
524 | uint64_t pltEntryAddr) const override; |
525 | void writeIBTPlt(uint8_t *buf, size_t numEntries) const override; |
526 | |
527 | static const unsigned = 16; |
528 | }; |
529 | } // namespace |
530 | |
531 | IntelIBT::IntelIBT() { pltHeaderSize = 0; } |
532 | |
533 | void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const { |
534 | uint64_t va = |
535 | in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize; |
536 | write32le(P: buf, V: va); |
537 | } |
538 | |
539 | void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym, |
540 | uint64_t /*pltEntryAddr*/) const { |
541 | if (config->isPic) { |
542 | const uint8_t inst[] = { |
543 | 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 |
544 | 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx) |
545 | 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop |
546 | }; |
547 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
548 | write32le(P: buf + 6, V: sym.getGotPltVA() - in.gotPlt->getVA()); |
549 | return; |
550 | } |
551 | |
552 | const uint8_t inst[] = { |
553 | 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 |
554 | 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT |
555 | 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop |
556 | }; |
557 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
558 | write32le(P: buf + 6, V: sym.getGotPltVA()); |
559 | } |
560 | |
561 | void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const { |
562 | writePltHeader(buf); |
563 | buf += IBTPltHeaderSize; |
564 | |
565 | const uint8_t inst[] = { |
566 | 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 |
567 | 0x68, 0, 0, 0, 0, // pushl $reloc_offset |
568 | 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC |
569 | 0x66, 0x90, // nop |
570 | }; |
571 | |
572 | for (size_t i = 0; i < numEntries; ++i) { |
573 | memcpy(dest: buf, src: inst, n: sizeof(inst)); |
574 | write32le(P: buf + 5, V: i * sizeof(object::ELF32LE::Rel)); |
575 | write32le(P: buf + 10, V: -pltHeaderSize - sizeof(inst) * i - 30); |
576 | buf += sizeof(inst); |
577 | } |
578 | } |
579 | |
580 | namespace { |
581 | class RetpolinePic : public X86 { |
582 | public: |
583 | RetpolinePic(); |
584 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
585 | void writePltHeader(uint8_t *buf) const override; |
586 | void writePlt(uint8_t *buf, const Symbol &sym, |
587 | uint64_t pltEntryAddr) const override; |
588 | }; |
589 | |
590 | class RetpolineNoPic : public X86 { |
591 | public: |
592 | RetpolineNoPic(); |
593 | void writeGotPlt(uint8_t *buf, const Symbol &s) const override; |
594 | void writePltHeader(uint8_t *buf) const override; |
595 | void writePlt(uint8_t *buf, const Symbol &sym, |
596 | uint64_t pltEntryAddr) const override; |
597 | }; |
598 | } // namespace |
599 | |
600 | RetpolinePic::RetpolinePic() { |
601 | pltHeaderSize = 48; |
602 | pltEntrySize = 32; |
603 | ipltEntrySize = 32; |
604 | } |
605 | |
606 | void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const { |
607 | write32le(P: buf, V: s.getPltVA() + 17); |
608 | } |
609 | |
610 | void RetpolinePic::(uint8_t *buf) const { |
611 | const uint8_t insn[] = { |
612 | 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx) |
613 | 0x50, // 6: pushl %eax |
614 | 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax |
615 | 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next |
616 | 0xf3, 0x90, // 12: loop: pause |
617 | 0x0f, 0xae, 0xe8, // 14: lfence |
618 | 0xeb, 0xf9, // 17: jmp loop |
619 | 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16 |
620 | 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) |
621 | 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx |
622 | 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) |
623 | 0x89, 0xc8, // 2b: mov %ecx, %eax |
624 | 0x59, // 2d: pop %ecx |
625 | 0xc3, // 2e: ret |
626 | 0xcc, // 2f: int3; padding |
627 | }; |
628 | memcpy(dest: buf, src: insn, n: sizeof(insn)); |
629 | } |
630 | |
631 | void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym, |
632 | uint64_t pltEntryAddr) const { |
633 | unsigned relOff = in.relaPlt->entsize * sym.getPltIdx(); |
634 | const uint8_t insn[] = { |
635 | 0x50, // pushl %eax |
636 | 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax |
637 | 0xe8, 0, 0, 0, 0, // call plt+0x20 |
638 | 0xe9, 0, 0, 0, 0, // jmp plt+0x12 |
639 | 0x68, 0, 0, 0, 0, // pushl $reloc_offset |
640 | 0xe9, 0, 0, 0, 0, // jmp plt+0 |
641 | 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding |
642 | }; |
643 | memcpy(dest: buf, src: insn, n: sizeof(insn)); |
644 | |
645 | uint32_t ebx = in.gotPlt->getVA(); |
646 | unsigned off = pltEntryAddr - in.plt->getVA(); |
647 | write32le(P: buf + 3, V: sym.getGotPltVA() - ebx); |
648 | write32le(P: buf + 8, V: -off - 12 + 32); |
649 | write32le(P: buf + 13, V: -off - 17 + 18); |
650 | write32le(P: buf + 18, V: relOff); |
651 | write32le(P: buf + 23, V: -off - 27); |
652 | } |
653 | |
654 | RetpolineNoPic::RetpolineNoPic() { |
655 | pltHeaderSize = 48; |
656 | pltEntrySize = 32; |
657 | ipltEntrySize = 32; |
658 | } |
659 | |
660 | void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const { |
661 | write32le(P: buf, V: s.getPltVA() + 16); |
662 | } |
663 | |
664 | void RetpolineNoPic::(uint8_t *buf) const { |
665 | const uint8_t insn[] = { |
666 | 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4 |
667 | 0x50, // 6: pushl %eax |
668 | 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax |
669 | 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next |
670 | 0xf3, 0x90, // 11: loop: pause |
671 | 0x0f, 0xae, 0xe8, // 13: lfence |
672 | 0xeb, 0xf9, // 16: jmp loop |
673 | 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3 |
674 | 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16 |
675 | 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp) |
676 | 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx |
677 | 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp) |
678 | 0x89, 0xc8, // 2b: mov %ecx, %eax |
679 | 0x59, // 2d: pop %ecx |
680 | 0xc3, // 2e: ret |
681 | 0xcc, // 2f: int3; padding |
682 | }; |
683 | memcpy(dest: buf, src: insn, n: sizeof(insn)); |
684 | |
685 | uint32_t gotPlt = in.gotPlt->getVA(); |
686 | write32le(P: buf + 2, V: gotPlt + 4); |
687 | write32le(P: buf + 8, V: gotPlt + 8); |
688 | } |
689 | |
690 | void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym, |
691 | uint64_t pltEntryAddr) const { |
692 | unsigned relOff = in.relaPlt->entsize * sym.getPltIdx(); |
693 | const uint8_t insn[] = { |
694 | 0x50, // 0: pushl %eax |
695 | 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax |
696 | 0xe8, 0, 0, 0, 0, // 6: call plt+0x20 |
697 | 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11 |
698 | 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset |
699 | 0xe9, 0, 0, 0, 0, // 15: jmp plt+0 |
700 | 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding |
701 | 0xcc, // 1f: int3; padding |
702 | }; |
703 | memcpy(dest: buf, src: insn, n: sizeof(insn)); |
704 | |
705 | unsigned off = pltEntryAddr - in.plt->getVA(); |
706 | write32le(P: buf + 2, V: sym.getGotPltVA()); |
707 | write32le(P: buf + 7, V: -off - 11 + 32); |
708 | write32le(P: buf + 12, V: -off - 16 + 17); |
709 | write32le(P: buf + 17, V: relOff); |
710 | write32le(P: buf + 22, V: -off - 26); |
711 | } |
712 | |
713 | TargetInfo *elf::getX86TargetInfo() { |
714 | if (config->zRetpolineplt) { |
715 | if (config->isPic) { |
716 | static RetpolinePic t; |
717 | return &t; |
718 | } |
719 | static RetpolineNoPic t; |
720 | return &t; |
721 | } |
722 | |
723 | if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) { |
724 | static IntelIBT t; |
725 | return &t; |
726 | } |
727 | |
728 | static X86 t; |
729 | return &t; |
730 | } |
731 | |