1//===- X86.cpp ------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "OutputSections.h"
10#include "Symbols.h"
11#include "SyntheticSections.h"
12#include "Target.h"
13#include "llvm/Support/Endian.h"
14
15using namespace llvm;
16using namespace llvm::support::endian;
17using namespace llvm::ELF;
18using namespace lld;
19using namespace lld::elf;
20
21namespace {
22class X86 : public TargetInfo {
23public:
24 X86(Ctx &);
25 int getTlsGdRelaxSkip(RelType type) const override;
26 RelExpr getRelExpr(RelType type, const Symbol &s,
27 const uint8_t *loc) const override;
28 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
29 void writeGotPltHeader(uint8_t *buf) const override;
30 RelType getDynRel(RelType type) const override;
31 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
32 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
33 void writePltHeader(uint8_t *buf) const override;
34 void writePlt(uint8_t *buf, const Symbol &sym,
35 uint64_t pltEntryAddr) const override;
36 void relocate(uint8_t *loc, const Relocation &rel,
37 uint64_t val) const override;
38
39 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
40 void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
41
42private:
43 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
44 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
45 void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
46 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
47};
48} // namespace
49
50X86::X86(Ctx &ctx) : TargetInfo(ctx) {
51 copyRel = R_386_COPY;
52 gotRel = R_386_GLOB_DAT;
53 pltRel = R_386_JUMP_SLOT;
54 iRelativeRel = R_386_IRELATIVE;
55 relativeRel = R_386_RELATIVE;
56 symbolicRel = R_386_32;
57 tlsDescRel = R_386_TLS_DESC;
58 tlsGotRel = R_386_TLS_TPOFF;
59 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
60 tlsOffsetRel = R_386_TLS_DTPOFF32;
61 gotBaseSymInGotPlt = true;
62 pltHeaderSize = 16;
63 pltEntrySize = 16;
64 ipltEntrySize = 16;
65 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
66
67 // Align to the non-PAE large page size (known as a superpage or huge page).
68 // FreeBSD automatically promotes large, superpage-aligned allocations.
69 defaultImageBase = 0x400000;
70}
71
72int X86::getTlsGdRelaxSkip(RelType type) const {
73 // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
74 return type == R_386_TLS_GOTDESC || type == R_386_TLS_DESC_CALL ? 1 : 2;
75}
76
77RelExpr X86::getRelExpr(RelType type, const Symbol &s,
78 const uint8_t *loc) const {
79 switch (type) {
80 case R_386_8:
81 case R_386_16:
82 case R_386_32:
83 return R_ABS;
84 case R_386_TLS_LDO_32:
85 return R_DTPREL;
86 case R_386_TLS_GD:
87 return R_TLSGD_GOTPLT;
88 case R_386_TLS_LDM:
89 return R_TLSLD_GOTPLT;
90 case R_386_PLT32:
91 return R_PLT_PC;
92 case R_386_PC8:
93 case R_386_PC16:
94 case R_386_PC32:
95 return R_PC;
96 case R_386_GOTPC:
97 return R_GOTPLTONLY_PC;
98 case R_386_TLS_IE:
99 return R_GOT;
100 case R_386_GOT32:
101 case R_386_GOT32X:
102 // These relocations are arguably mis-designed because their calculations
103 // depend on the instructions they are applied to. This is bad because we
104 // usually don't care about whether the target section contains valid
105 // machine instructions or not. But this is part of the documented ABI, so
106 // we had to implement as the standard requires.
107 //
108 // x86 does not support PC-relative data access. Therefore, in order to
109 // access GOT contents, a GOT address needs to be known at link-time
110 // (which means non-PIC) or compilers have to emit code to get a GOT
111 // address at runtime (which means code is position-independent but
112 // compilers need to emit extra code for each GOT access.) This decision
113 // is made at compile-time. In the latter case, compilers emit code to
114 // load a GOT address to a register, which is usually %ebx.
115 //
116 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
117 // foo@GOT(%ebx).
118 //
119 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
120 // find such relocation, we should report an error. foo@GOT is resolved to
121 // an *absolute* address of foo's GOT entry, because both GOT address and
122 // foo's offset are known. In other words, it's G + A.
123 //
124 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
125 // foo's GOT entry in the table, because GOT address is not known but foo's
126 // offset in the table is known. It's G + A - GOT.
127 //
128 // It's unfortunate that compilers emit the same relocation for these
129 // different use cases. In order to distinguish them, we have to read a
130 // machine instruction.
131 //
132 // The following code implements it. We assume that Loc[0] is the first byte
133 // of a displacement or an immediate field of a valid machine
134 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
135 // the byte, we can determine whether the instruction uses the operand as an
136 // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
137 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
138 case R_386_TLS_GOTDESC:
139 return R_TLSDESC_GOTPLT;
140 case R_386_TLS_DESC_CALL:
141 return R_TLSDESC_CALL;
142 case R_386_TLS_GOTIE:
143 return R_GOTPLT;
144 case R_386_GOTOFF:
145 return R_GOTPLTREL;
146 case R_386_TLS_LE:
147 return R_TPREL;
148 case R_386_TLS_LE_32:
149 return R_TPREL_NEG;
150 case R_386_NONE:
151 return R_NONE;
152 default:
153 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
154 << ") against symbol " << &s;
155 return R_NONE;
156 }
157}
158
159RelExpr X86::adjustTlsExpr(RelType type, RelExpr expr) const {
160 switch (expr) {
161 default:
162 return expr;
163 case R_RELAX_TLS_GD_TO_IE:
164 return R_RELAX_TLS_GD_TO_IE_GOTPLT;
165 case R_RELAX_TLS_GD_TO_LE:
166 return type == R_386_TLS_GD ? R_RELAX_TLS_GD_TO_LE_NEG
167 : R_RELAX_TLS_GD_TO_LE;
168 }
169}
170
171void X86::writeGotPltHeader(uint8_t *buf) const {
172 write32le(P: buf, V: ctx.mainPart->dynamic->getVA());
173}
174
175void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
176 // Entries in .got.plt initially points back to the corresponding
177 // PLT entries with a fixed offset to skip the first instruction.
178 write32le(P: buf, V: s.getPltVA(ctx) + 6);
179}
180
181void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
182 // An x86 entry is the address of the ifunc resolver function.
183 write32le(P: buf, V: s.getVA(ctx));
184}
185
186RelType X86::getDynRel(RelType type) const {
187 if (type == R_386_TLS_LE)
188 return R_386_TLS_TPOFF;
189 if (type == R_386_TLS_LE_32)
190 return R_386_TLS_TPOFF32;
191 return type;
192}
193
194void X86::writePltHeader(uint8_t *buf) const {
195 if (ctx.arg.isPic) {
196 const uint8_t v[] = {
197 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
198 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
199 0x90, 0x90, 0x90, 0x90 // nop
200 };
201 memcpy(dest: buf, src: v, n: sizeof(v));
202 return;
203 }
204
205 const uint8_t pltData[] = {
206 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
207 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
208 0x90, 0x90, 0x90, 0x90, // nop
209 };
210 memcpy(dest: buf, src: pltData, n: sizeof(pltData));
211 uint32_t gotPlt = ctx.in.gotPlt->getVA();
212 write32le(P: buf + 2, V: gotPlt + 4);
213 write32le(P: buf + 8, V: gotPlt + 8);
214}
215
216void X86::writePlt(uint8_t *buf, const Symbol &sym,
217 uint64_t pltEntryAddr) const {
218 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
219 if (ctx.arg.isPic) {
220 const uint8_t inst[] = {
221 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
222 0x68, 0, 0, 0, 0, // pushl $reloc_offset
223 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
224 };
225 memcpy(dest: buf, src: inst, n: sizeof(inst));
226 write32le(P: buf + 2, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
227 } else {
228 const uint8_t inst[] = {
229 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
230 0x68, 0, 0, 0, 0, // pushl $reloc_offset
231 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
232 };
233 memcpy(dest: buf, src: inst, n: sizeof(inst));
234 write32le(P: buf + 2, V: sym.getGotPltVA(ctx));
235 }
236
237 write32le(P: buf + 7, V: relOff);
238 write32le(P: buf + 12, V: ctx.in.plt->getVA() - pltEntryAddr - 16);
239}
240
241int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
242 switch (type) {
243 case R_386_8:
244 case R_386_PC8:
245 return SignExtend64<8>(x: *buf);
246 case R_386_16:
247 case R_386_PC16:
248 return SignExtend64<16>(x: read16le(P: buf));
249 case R_386_32:
250 case R_386_GLOB_DAT:
251 case R_386_GOT32:
252 case R_386_GOT32X:
253 case R_386_GOTOFF:
254 case R_386_GOTPC:
255 case R_386_IRELATIVE:
256 case R_386_PC32:
257 case R_386_PLT32:
258 case R_386_RELATIVE:
259 case R_386_TLS_GOTDESC:
260 case R_386_TLS_DESC_CALL:
261 case R_386_TLS_DTPMOD32:
262 case R_386_TLS_DTPOFF32:
263 case R_386_TLS_LDO_32:
264 case R_386_TLS_LDM:
265 case R_386_TLS_IE:
266 case R_386_TLS_IE_32:
267 case R_386_TLS_LE:
268 case R_386_TLS_LE_32:
269 case R_386_TLS_GD:
270 case R_386_TLS_GD_32:
271 case R_386_TLS_GOTIE:
272 case R_386_TLS_TPOFF:
273 case R_386_TLS_TPOFF32:
274 return SignExtend64<32>(x: read32le(P: buf));
275 case R_386_TLS_DESC:
276 return SignExtend64<32>(x: read32le(P: buf + 4));
277 case R_386_NONE:
278 case R_386_JUMP_SLOT:
279 // These relocations are defined as not having an implicit addend.
280 return 0;
281 default:
282 InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
283 return 0;
284 }
285}
286
287void X86::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
288 switch (rel.type) {
289 case R_386_8:
290 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
291 // being used for some 16-bit programs such as boot loaders, so
292 // we want to support them.
293 checkIntUInt(ctx, loc, v: val, n: 8, rel);
294 *loc = val;
295 break;
296 case R_386_PC8:
297 checkInt(ctx, loc, v: val, n: 8, rel);
298 *loc = val;
299 break;
300 case R_386_16:
301 checkIntUInt(ctx, loc, v: val, n: 16, rel);
302 write16le(P: loc, V: val);
303 break;
304 case R_386_PC16:
305 // R_386_PC16 is normally used with 16 bit code. In that situation
306 // the PC is 16 bits, just like the addend. This means that it can
307 // point from any 16 bit address to any other if the possibility
308 // of wrapping is included.
309 // The only restriction we have to check then is that the destination
310 // address fits in 16 bits. That is impossible to do here. The problem is
311 // that we are passed the final value, which already had the
312 // current location subtracted from it.
313 // We just check that Val fits in 17 bits. This misses some cases, but
314 // should have no false positives.
315 checkInt(ctx, loc, v: val, n: 17, rel);
316 write16le(P: loc, V: val);
317 break;
318 case R_386_32:
319 case R_386_GOT32:
320 case R_386_GOT32X:
321 case R_386_GOTOFF:
322 case R_386_GOTPC:
323 case R_386_PC32:
324 case R_386_PLT32:
325 case R_386_RELATIVE:
326 case R_386_TLS_GOTDESC:
327 case R_386_TLS_DESC_CALL:
328 case R_386_TLS_DTPMOD32:
329 case R_386_TLS_DTPOFF32:
330 case R_386_TLS_GD:
331 case R_386_TLS_GOTIE:
332 case R_386_TLS_IE:
333 case R_386_TLS_LDM:
334 case R_386_TLS_LDO_32:
335 case R_386_TLS_LE:
336 case R_386_TLS_LE_32:
337 case R_386_TLS_TPOFF:
338 case R_386_TLS_TPOFF32:
339 checkInt(ctx, loc, v: val, n: 32, rel);
340 write32le(P: loc, V: val);
341 break;
342 case R_386_TLS_DESC:
343 // The addend is stored in the second 32-bit word.
344 write32le(P: loc + 4, V: val);
345 break;
346 default:
347 llvm_unreachable("unknown relocation");
348 }
349}
350
351void X86::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
352 uint64_t val) const {
353 if (rel.type == R_386_TLS_GD) {
354 // Convert (loc[-2] == 0x04)
355 // leal x@tlsgd(, %ebx, 1), %eax
356 // call ___tls_get_addr@plt
357 // or
358 // leal x@tlsgd(%reg), %eax
359 // call *___tls_get_addr@got(%reg)
360 // to
361 const uint8_t inst[] = {
362 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
363 0x81, 0xe8, 0, 0, 0, 0, // subl x@ntpoff(%ebx), %eax
364 };
365 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
366 memcpy(dest: w, src: inst, n: sizeof(inst));
367 write32le(P: w + 8, V: val);
368 } else if (rel.type == R_386_TLS_GOTDESC) {
369 // Convert leal x@tlsdesc(%ebx), %eax to leal x@ntpoff, %eax.
370 //
371 // Note: call *x@tlsdesc(%eax) may not immediately follow this instruction.
372 if (memcmp(s1: loc - 2, s2: "\x8d\x83", n: 2)) {
373 ErrAlways(ctx)
374 << getErrorLoc(ctx, loc: loc - 2)
375 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
376 return;
377 }
378 loc[-1] = 0x05;
379 write32le(P: loc, V: val);
380 } else {
381 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
382 assert(rel.type == R_386_TLS_DESC_CALL);
383 loc[0] = 0x66;
384 loc[1] = 0x90;
385 }
386}
387
388void X86::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
389 uint64_t val) const {
390 if (rel.type == R_386_TLS_GD) {
391 // Convert (loc[-2] == 0x04)
392 // leal x@tlsgd(, %ebx, 1), %eax
393 // call ___tls_get_addr@plt
394 // or
395 // leal x@tlsgd(%reg), %eax
396 // call *___tls_get_addr@got(%reg)
397 const uint8_t inst[] = {
398 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
399 0x03, 0x83, 0, 0, 0, 0, // addl x@gottpoff(%ebx), %eax
400 };
401 uint8_t *w = loc[-2] == 0x04 ? loc - 3 : loc - 2;
402 memcpy(dest: w, src: inst, n: sizeof(inst));
403 write32le(P: w + 8, V: val);
404 } else if (rel.type == R_386_TLS_GOTDESC) {
405 // Convert leal x@tlsdesc(%ebx), %eax to movl x@gotntpoff(%ebx), %eax.
406 if (memcmp(s1: loc - 2, s2: "\x8d\x83", n: 2)) {
407 ErrAlways(ctx)
408 << getErrorLoc(ctx, loc: loc - 2)
409 << "R_386_TLS_GOTDESC must be used in leal x@tlsdesc(%ebx), %eax";
410 return;
411 }
412 loc[-2] = 0x8b;
413 write32le(P: loc, V: val);
414 } else {
415 // Convert call *x@tlsdesc(%eax) to xchg ax, ax.
416 assert(rel.type == R_386_TLS_DESC_CALL);
417 loc[0] = 0x66;
418 loc[1] = 0x90;
419 }
420}
421
422// In some conditions, relocations can be optimized to avoid using GOT.
423// This function does that for Initial Exec to Local Exec case.
424void X86::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
425 uint64_t val) const {
426 // Ulrich's document section 6.2 says that @gotntpoff can
427 // be used with MOVL or ADDL instructions.
428 // @indntpoff is similar to @gotntpoff, but for use in
429 // position dependent code.
430 uint8_t reg = (loc[-1] >> 3) & 7;
431
432 if (rel.type == R_386_TLS_IE) {
433 if (loc[-1] == 0xa1) {
434 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
435 // This case is different from the generic case below because
436 // this is a 5 byte instruction while below is 6 bytes.
437 loc[-1] = 0xb8;
438 } else if (loc[-2] == 0x8b) {
439 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
440 loc[-2] = 0xc7;
441 loc[-1] = 0xc0 | reg;
442 } else {
443 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
444 loc[-2] = 0x81;
445 loc[-1] = 0xc0 | reg;
446 }
447 } else {
448 assert(rel.type == R_386_TLS_GOTIE);
449 if (loc[-2] == 0x8b) {
450 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
451 loc[-2] = 0xc7;
452 loc[-1] = 0xc0 | reg;
453 } else {
454 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
455 loc[-2] = 0x8d;
456 loc[-1] = 0x80 | (reg << 3) | reg;
457 }
458 }
459 write32le(P: loc, V: val);
460}
461
462void X86::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,
463 uint64_t val) const {
464 if (rel.type == R_386_TLS_LDO_32) {
465 write32le(P: loc, V: val);
466 return;
467 }
468
469 if (loc[4] == 0xe8) {
470 // Convert
471 // leal x(%reg),%eax
472 // call ___tls_get_addr@plt
473 // to
474 const uint8_t inst[] = {
475 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
476 0x90, // nop
477 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
478 };
479 memcpy(dest: loc - 2, src: inst, n: sizeof(inst));
480 return;
481 }
482
483 // Convert
484 // leal x(%reg),%eax
485 // call *___tls_get_addr@got(%reg)
486 // to
487 const uint8_t inst[] = {
488 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
489 0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00, // leal (%esi),%esi
490 };
491 memcpy(dest: loc - 2, src: inst, n: sizeof(inst));
492}
493
494void X86::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
495 uint64_t secAddr = sec.getOutputSection()->addr;
496 if (auto *s = dyn_cast<InputSection>(Val: &sec))
497 secAddr += s->outSecOff;
498 for (const Relocation &rel : sec.relocs()) {
499 uint8_t *loc = buf + rel.offset;
500 const uint64_t val =
501 SignExtend64(X: sec.getRelocTargetVA(ctx, r: rel, p: secAddr + rel.offset), B: 32);
502 switch (rel.expr) {
503 case R_RELAX_TLS_GD_TO_IE_GOTPLT:
504 relaxTlsGdToIe(loc, rel, val);
505 continue;
506 case R_RELAX_TLS_GD_TO_LE:
507 case R_RELAX_TLS_GD_TO_LE_NEG:
508 relaxTlsGdToLe(loc, rel, val);
509 continue;
510 case R_RELAX_TLS_LD_TO_LE:
511 relaxTlsLdToLe(loc, rel, val);
512 break;
513 case R_RELAX_TLS_IE_TO_LE:
514 relaxTlsIeToLe(loc, rel, val);
515 continue;
516 default:
517 relocate(loc, rel, val);
518 break;
519 }
520 }
521}
522
523// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
524// entries containing endbr32 instructions. A PLT entry will be split into two
525// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).
526namespace {
527class IntelIBT : public X86 {
528public:
529 IntelIBT(Ctx &ctx) : X86(ctx) { pltHeaderSize = 0; }
530 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
531 void writePlt(uint8_t *buf, const Symbol &sym,
532 uint64_t pltEntryAddr) const override;
533 void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;
534
535 static const unsigned IBTPltHeaderSize = 16;
536};
537} // namespace
538
539void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {
540 uint64_t va = ctx.in.ibtPlt->getVA() + IBTPltHeaderSize +
541 s.getPltIdx(ctx) * pltEntrySize;
542 write32le(P: buf, V: va);
543}
544
545void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,
546 uint64_t /*pltEntryAddr*/) const {
547 if (ctx.arg.isPic) {
548 const uint8_t inst[] = {
549 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
550 0xff, 0xa3, 0, 0, 0, 0, // jmp *name@GOT(%ebx)
551 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
552 };
553 memcpy(dest: buf, src: inst, n: sizeof(inst));
554 write32le(P: buf + 6, V: sym.getGotPltVA(ctx) - ctx.in.gotPlt->getVA());
555 return;
556 }
557
558 const uint8_t inst[] = {
559 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
560 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
561 0x66, 0x0f, 0x1f, 0x44, 0, 0, // nop
562 };
563 memcpy(dest: buf, src: inst, n: sizeof(inst));
564 write32le(P: buf + 6, V: sym.getGotPltVA(ctx));
565}
566
567void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {
568 writePltHeader(buf);
569 buf += IBTPltHeaderSize;
570
571 const uint8_t inst[] = {
572 0xf3, 0x0f, 0x1e, 0xfb, // endbr32
573 0x68, 0, 0, 0, 0, // pushl $reloc_offset
574 0xe9, 0, 0, 0, 0, // jmpq .PLT0@PC
575 0x66, 0x90, // nop
576 };
577
578 for (size_t i = 0; i < numEntries; ++i) {
579 memcpy(dest: buf, src: inst, n: sizeof(inst));
580 write32le(P: buf + 5, V: i * sizeof(object::ELF32LE::Rel));
581 write32le(P: buf + 10, V: -pltHeaderSize - sizeof(inst) * i - 30);
582 buf += sizeof(inst);
583 }
584}
585
586namespace {
587class RetpolinePic : public X86 {
588public:
589 RetpolinePic(Ctx &);
590 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
591 void writePltHeader(uint8_t *buf) const override;
592 void writePlt(uint8_t *buf, const Symbol &sym,
593 uint64_t pltEntryAddr) const override;
594};
595
596class RetpolineNoPic : public X86 {
597public:
598 RetpolineNoPic(Ctx &);
599 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
600 void writePltHeader(uint8_t *buf) const override;
601 void writePlt(uint8_t *buf, const Symbol &sym,
602 uint64_t pltEntryAddr) const override;
603};
604} // namespace
605
606RetpolinePic::RetpolinePic(Ctx &ctx) : X86(ctx) {
607 pltHeaderSize = 48;
608 pltEntrySize = 32;
609 ipltEntrySize = 32;
610}
611
612void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
613 write32le(P: buf, V: s.getPltVA(ctx) + 17);
614}
615
616void RetpolinePic::writePltHeader(uint8_t *buf) const {
617 const uint8_t insn[] = {
618 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
619 0x50, // 6: pushl %eax
620 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
621 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
622 0xf3, 0x90, // 12: loop: pause
623 0x0f, 0xae, 0xe8, // 14: lfence
624 0xeb, 0xf9, // 17: jmp loop
625 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
626 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
627 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
628 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
629 0x89, 0xc8, // 2b: mov %ecx, %eax
630 0x59, // 2d: pop %ecx
631 0xc3, // 2e: ret
632 0xcc, // 2f: int3; padding
633 };
634 memcpy(dest: buf, src: insn, n: sizeof(insn));
635}
636
637void RetpolinePic::writePlt(uint8_t *buf, const Symbol &sym,
638 uint64_t pltEntryAddr) const {
639 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
640 const uint8_t insn[] = {
641 0x50, // pushl %eax
642 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
643 0xe8, 0, 0, 0, 0, // call plt+0x20
644 0xe9, 0, 0, 0, 0, // jmp plt+0x12
645 0x68, 0, 0, 0, 0, // pushl $reloc_offset
646 0xe9, 0, 0, 0, 0, // jmp plt+0
647 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
648 };
649 memcpy(dest: buf, src: insn, n: sizeof(insn));
650
651 uint32_t ebx = ctx.in.gotPlt->getVA();
652 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
653 write32le(P: buf + 3, V: sym.getGotPltVA(ctx) - ebx);
654 write32le(P: buf + 8, V: -off - 12 + 32);
655 write32le(P: buf + 13, V: -off - 17 + 18);
656 write32le(P: buf + 18, V: relOff);
657 write32le(P: buf + 23, V: -off - 27);
658}
659
660RetpolineNoPic::RetpolineNoPic(Ctx &ctx) : X86(ctx) {
661 pltHeaderSize = 48;
662 pltEntrySize = 32;
663 ipltEntrySize = 32;
664}
665
666void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
667 write32le(P: buf, V: s.getPltVA(ctx) + 16);
668}
669
670void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
671 const uint8_t insn[] = {
672 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
673 0x50, // 6: pushl %eax
674 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
675 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
676 0xf3, 0x90, // 11: loop: pause
677 0x0f, 0xae, 0xe8, // 13: lfence
678 0xeb, 0xf9, // 16: jmp loop
679 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
680 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
681 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
682 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
683 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
684 0x89, 0xc8, // 2b: mov %ecx, %eax
685 0x59, // 2d: pop %ecx
686 0xc3, // 2e: ret
687 0xcc, // 2f: int3; padding
688 };
689 memcpy(dest: buf, src: insn, n: sizeof(insn));
690
691 uint32_t gotPlt = ctx.in.gotPlt->getVA();
692 write32le(P: buf + 2, V: gotPlt + 4);
693 write32le(P: buf + 8, V: gotPlt + 8);
694}
695
696void RetpolineNoPic::writePlt(uint8_t *buf, const Symbol &sym,
697 uint64_t pltEntryAddr) const {
698 unsigned relOff = ctx.in.relaPlt->entsize * sym.getPltIdx(ctx);
699 const uint8_t insn[] = {
700 0x50, // 0: pushl %eax
701 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
702 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
703 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
704 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
705 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
706 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
707 0xcc, // 1f: int3; padding
708 };
709 memcpy(dest: buf, src: insn, n: sizeof(insn));
710
711 unsigned off = pltEntryAddr - ctx.in.plt->getVA();
712 write32le(P: buf + 2, V: sym.getGotPltVA(ctx));
713 write32le(P: buf + 7, V: -off - 11 + 32);
714 write32le(P: buf + 12, V: -off - 16 + 17);
715 write32le(P: buf + 17, V: relOff);
716 write32le(P: buf + 22, V: -off - 26);
717}
718
719void elf::setX86TargetInfo(Ctx &ctx) {
720 if (ctx.arg.zRetpolineplt) {
721 if (ctx.arg.isPic)
722 ctx.target.reset(p: new RetpolinePic(ctx));
723 else
724 ctx.target.reset(p: new RetpolineNoPic(ctx));
725 return;
726 }
727
728 if (ctx.arg.andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)
729 ctx.target.reset(p: new IntelIBT(ctx));
730 else
731 ctx.target.reset(p: new X86(ctx));
732}
733

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of lld/ELF/Arch/X86.cpp