1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * ld script for the x86 kernel |
4 | * |
5 | * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> |
6 | * |
7 | * Modernisation, unification and other changes and fixes: |
8 | * Copyright (C) 2007-2009 Sam Ravnborg <sam@ravnborg.org> |
9 | * |
10 | * |
11 | * Don't define absolute symbols until and unless you know that symbol |
12 | * value is should remain constant even if kernel image is relocated |
13 | * at run time. Absolute symbols are not relocated. If symbol value should |
14 | * change if kernel is relocated, make the symbol section relative and |
15 | * put it inside the section definition. |
16 | */ |
17 | |
18 | #ifdef CONFIG_X86_32 |
19 | #define LOAD_OFFSET __PAGE_OFFSET |
20 | #else |
21 | #define LOAD_OFFSET __START_KERNEL_map |
22 | #endif |
23 | |
24 | #define RUNTIME_DISCARD_EXIT |
25 | #define EMITS_PT_NOTE |
26 | #define RO_EXCEPTION_TABLE_ALIGN 16 |
27 | |
28 | #include <asm-generic/vmlinux.lds.h> |
29 | #include <asm/asm-offsets.h> |
30 | #include <asm/thread_info.h> |
31 | #include <asm/page_types.h> |
32 | #include <asm/orc_lookup.h> |
33 | #include <asm/cache.h> |
34 | #include <asm/boot.h> |
35 | |
36 | #undef i386 /* in case the preprocessor is a 32bit one */ |
37 | |
38 | OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT) |
39 | |
40 | #ifdef CONFIG_X86_32 |
41 | OUTPUT_ARCH(i386) |
42 | ENTRY(phys_startup_32) |
43 | #else |
44 | OUTPUT_ARCH(i386:x86-64) |
45 | ENTRY(phys_startup_64) |
46 | #endif |
47 | |
48 | jiffies = jiffies_64; |
49 | |
50 | #if defined(CONFIG_X86_64) |
51 | /* |
52 | * On 64-bit, align RODATA to 2MB so we retain large page mappings for |
53 | * boundaries spanning kernel text, rodata and data sections. |
54 | * |
55 | * However, kernel identity mappings will have different RWX permissions |
56 | * to the pages mapping to text and to the pages padding (which are freed) the |
57 | * text section. Hence kernel identity mappings will be broken to smaller |
58 | * pages. For 64-bit, kernel text and kernel identity mappings are different, |
59 | * so we can enable protection checks as well as retain 2MB large page |
60 | * mappings for kernel text. |
61 | */ |
62 | #define X86_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); |
63 | |
64 | #define X86_ALIGN_RODATA_END \ |
65 | . = ALIGN(HPAGE_SIZE); \ |
66 | __end_rodata_hpage_align = .; \ |
67 | __end_rodata_aligned = .; |
68 | |
69 | #define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); |
70 | #define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); |
71 | |
72 | /* |
73 | * This section contains data which will be mapped as decrypted. Memory |
74 | * encryption operates on a page basis. Make this section PMD-aligned |
75 | * to avoid splitting the pages while mapping the section early. |
76 | * |
77 | * Note: We use a separate section so that only this section gets |
78 | * decrypted to avoid exposing more than we wish. |
79 | */ |
80 | #define BSS_DECRYPTED \ |
81 | . = ALIGN(PMD_SIZE); \ |
82 | __start_bss_decrypted = .; \ |
83 | *(.bss..decrypted); \ |
84 | . = ALIGN(PAGE_SIZE); \ |
85 | __start_bss_decrypted_unused = .; \ |
86 | . = ALIGN(PMD_SIZE); \ |
87 | __end_bss_decrypted = .; \ |
88 | |
89 | #else |
90 | |
91 | #define X86_ALIGN_RODATA_BEGIN |
92 | #define X86_ALIGN_RODATA_END \ |
93 | . = ALIGN(PAGE_SIZE); \ |
94 | __end_rodata_aligned = .; |
95 | |
96 | #define ALIGN_ENTRY_TEXT_BEGIN |
97 | #define ALIGN_ENTRY_TEXT_END |
98 | #define BSS_DECRYPTED |
99 | |
100 | #endif |
101 | |
102 | PHDRS { |
103 | text PT_LOAD FLAGS(5); /* R_E */ |
104 | data PT_LOAD FLAGS(6); /* RW_ */ |
105 | #ifdef CONFIG_X86_64 |
106 | #ifdef CONFIG_SMP |
107 | percpu PT_LOAD FLAGS(6); /* RW_ */ |
108 | #endif |
109 | init PT_LOAD FLAGS(7); /* RWE */ |
110 | #endif |
111 | note PT_NOTE FLAGS(0); /* ___ */ |
112 | } |
113 | |
114 | SECTIONS |
115 | { |
116 | #ifdef CONFIG_X86_32 |
117 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; |
118 | phys_startup_32 = ABSOLUTE(startup_32 - LOAD_OFFSET); |
119 | #else |
120 | . = __START_KERNEL; |
121 | phys_startup_64 = ABSOLUTE(startup_64 - LOAD_OFFSET); |
122 | #endif |
123 | |
124 | /* Text and read-only data */ |
125 | .text : AT(ADDR(.text) - LOAD_OFFSET) { |
126 | _text = .; |
127 | _stext = .; |
128 | /* bootstrapping code */ |
129 | HEAD_TEXT |
130 | TEXT_TEXT |
131 | SCHED_TEXT |
132 | LOCK_TEXT |
133 | KPROBES_TEXT |
134 | SOFTIRQENTRY_TEXT |
135 | #ifdef CONFIG_RETPOLINE |
136 | *(.text..__x86.indirect_thunk) |
137 | *(.text..__x86.return_thunk) |
138 | #endif |
139 | STATIC_CALL_TEXT |
140 | |
141 | ALIGN_ENTRY_TEXT_BEGIN |
142 | *(.text..__x86.rethunk_untrain) |
143 | ENTRY_TEXT |
144 | |
145 | #ifdef CONFIG_CPU_SRSO |
146 | /* |
147 | * See the comment above srso_alias_untrain_ret()'s |
148 | * definition. |
149 | */ |
150 | . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20); |
151 | *(.text..__x86.rethunk_safe) |
152 | #endif |
153 | ALIGN_ENTRY_TEXT_END |
154 | *(.gnu.warning) |
155 | |
156 | } :text = 0xcccccccc |
157 | |
158 | /* End of text section, which should occupy whole number of pages */ |
159 | _etext = .; |
160 | . = ALIGN(PAGE_SIZE); |
161 | |
162 | X86_ALIGN_RODATA_BEGIN |
163 | RO_DATA(PAGE_SIZE) |
164 | X86_ALIGN_RODATA_END |
165 | |
166 | /* Data */ |
167 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
168 | /* Start of data section */ |
169 | _sdata = .; |
170 | |
171 | /* init_task */ |
172 | INIT_TASK_DATA(THREAD_SIZE) |
173 | |
174 | #ifdef CONFIG_X86_32 |
175 | /* 32 bit has nosave before _edata */ |
176 | NOSAVE_DATA |
177 | #endif |
178 | |
179 | PAGE_ALIGNED_DATA(PAGE_SIZE) |
180 | |
181 | CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) |
182 | |
183 | DATA_DATA |
184 | CONSTRUCTORS |
185 | |
186 | /* rarely changed data like cpu maps */ |
187 | READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES) |
188 | |
189 | /* End of data section */ |
190 | _edata = .; |
191 | } :data |
192 | |
193 | BUG_TABLE |
194 | |
195 | ORC_UNWIND_TABLE |
196 | |
197 | . = ALIGN(PAGE_SIZE); |
198 | __vvar_page = .; |
199 | |
200 | .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { |
201 | /* work around gold bug 13023 */ |
202 | __vvar_beginning_hack = .; |
203 | |
204 | /* Place all vvars at the offsets in asm/vvar.h. */ |
205 | #define EMIT_VVAR(name, offset) \ |
206 | . = __vvar_beginning_hack + offset; \ |
207 | *(.vvar_ ## name) |
208 | #include <asm/vvar.h> |
209 | #undef EMIT_VVAR |
210 | |
211 | /* |
212 | * Pad the rest of the page with zeros. Otherwise the loader |
213 | * can leave garbage here. |
214 | */ |
215 | . = __vvar_beginning_hack + PAGE_SIZE; |
216 | } :data |
217 | |
218 | . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); |
219 | |
220 | /* Init code and data - will be freed after init */ |
221 | . = ALIGN(PAGE_SIZE); |
222 | .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { |
223 | __init_begin = .; /* paired with __init_end */ |
224 | } |
225 | |
226 | #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) |
227 | /* |
228 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the |
229 | * output PHDR, so the next output section - .init.text - should |
230 | * start another segment - init. |
231 | */ |
232 | PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) |
233 | ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START, |
234 | "per-CPU data too large - increase CONFIG_PHYSICAL_START" ) |
235 | #endif |
236 | |
237 | INIT_TEXT_SECTION(PAGE_SIZE) |
238 | #ifdef CONFIG_X86_64 |
239 | :init |
240 | #endif |
241 | |
242 | /* |
243 | * Section for code used exclusively before alternatives are run. All |
244 | * references to such code must be patched out by alternatives, normally |
245 | * by using X86_FEATURE_ALWAYS CPU feature bit. |
246 | * |
247 | * See static_cpu_has() for an example. |
248 | */ |
249 | .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) { |
250 | *(.altinstr_aux) |
251 | } |
252 | |
253 | INIT_DATA_SECTION(16) |
254 | |
255 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { |
256 | __x86_cpu_dev_start = .; |
257 | *(.x86_cpu_dev.init) |
258 | __x86_cpu_dev_end = .; |
259 | } |
260 | |
261 | #ifdef CONFIG_X86_INTEL_MID |
262 | .x86_intel_mid_dev.init : AT(ADDR(.x86_intel_mid_dev.init) - \ |
263 | LOAD_OFFSET) { |
264 | __x86_intel_mid_dev_start = .; |
265 | *(.x86_intel_mid_dev.init) |
266 | __x86_intel_mid_dev_end = .; |
267 | } |
268 | #endif |
269 | |
270 | /* |
271 | * start address and size of operations which during runtime |
272 | * can be patched with virtualization friendly instructions or |
273 | * baremetal native ones. Think page table operations. |
274 | * Details in paravirt_types.h |
275 | */ |
276 | . = ALIGN(8); |
277 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { |
278 | __parainstructions = .; |
279 | *(.parainstructions) |
280 | __parainstructions_end = .; |
281 | } |
282 | |
283 | #ifdef CONFIG_RETPOLINE |
284 | /* |
285 | * List of instructions that call/jmp/jcc to retpoline thunks |
286 | * __x86_indirect_thunk_*(). These instructions can be patched along |
287 | * with alternatives, after which the section can be freed. |
288 | */ |
289 | . = ALIGN(8); |
290 | .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) { |
291 | __retpoline_sites = .; |
292 | *(.retpoline_sites) |
293 | __retpoline_sites_end = .; |
294 | } |
295 | |
296 | . = ALIGN(8); |
297 | .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { |
298 | __return_sites = .; |
299 | *(.return_sites) |
300 | __return_sites_end = .; |
301 | } |
302 | |
303 | . = ALIGN(8); |
304 | .call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) { |
305 | __call_sites = .; |
306 | *(.call_sites) |
307 | __call_sites_end = .; |
308 | } |
309 | #endif |
310 | |
311 | #ifdef CONFIG_X86_KERNEL_IBT |
312 | . = ALIGN(8); |
313 | .ibt_endbr_seal : AT(ADDR(.ibt_endbr_seal) - LOAD_OFFSET) { |
314 | __ibt_endbr_seal = .; |
315 | *(.ibt_endbr_seal) |
316 | __ibt_endbr_seal_end = .; |
317 | } |
318 | #endif |
319 | |
320 | #ifdef CONFIG_FINEIBT |
321 | . = ALIGN(8); |
322 | .cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) { |
323 | __cfi_sites = .; |
324 | *(.cfi_sites) |
325 | __cfi_sites_end = .; |
326 | } |
327 | #endif |
328 | |
329 | /* |
330 | * struct alt_inst entries. From the header (alternative.h): |
331 | * "Alternative instructions for different CPU types or capabilities" |
332 | * Think locking instructions on spinlocks. |
333 | */ |
334 | . = ALIGN(8); |
335 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { |
336 | __alt_instructions = .; |
337 | *(.altinstructions) |
338 | __alt_instructions_end = .; |
339 | } |
340 | |
341 | /* |
342 | * And here are the replacement instructions. The linker sticks |
343 | * them as binary blobs. The .altinstructions has enough data to |
344 | * get the address and the length of them to patch the kernel safely. |
345 | */ |
346 | .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { |
347 | *(.altinstr_replacement) |
348 | } |
349 | |
350 | . = ALIGN(8); |
351 | .apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) { |
352 | __apicdrivers = .; |
353 | *(.apicdrivers); |
354 | __apicdrivers_end = .; |
355 | } |
356 | |
357 | . = ALIGN(8); |
358 | /* |
359 | * .exit.text is discarded at runtime, not link time, to deal with |
360 | * references from .altinstructions |
361 | */ |
362 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { |
363 | EXIT_TEXT |
364 | } |
365 | |
366 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { |
367 | EXIT_DATA |
368 | } |
369 | |
370 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
371 | PERCPU_SECTION(INTERNODE_CACHE_BYTES) |
372 | #endif |
373 | |
374 | . = ALIGN(PAGE_SIZE); |
375 | |
376 | /* freed after init ends here */ |
377 | .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) { |
378 | __init_end = .; |
379 | } |
380 | |
381 | /* |
382 | * smp_locks might be freed after init |
383 | * start/end must be page aligned |
384 | */ |
385 | . = ALIGN(PAGE_SIZE); |
386 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { |
387 | __smp_locks = .; |
388 | *(.smp_locks) |
389 | . = ALIGN(PAGE_SIZE); |
390 | __smp_locks_end = .; |
391 | } |
392 | |
393 | #ifdef CONFIG_X86_64 |
394 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { |
395 | NOSAVE_DATA |
396 | } |
397 | #endif |
398 | |
399 | /* BSS */ |
400 | . = ALIGN(PAGE_SIZE); |
401 | .bss : AT(ADDR(.bss) - LOAD_OFFSET) { |
402 | __bss_start = .; |
403 | *(.bss..page_aligned) |
404 | . = ALIGN(PAGE_SIZE); |
405 | *(BSS_MAIN) |
406 | BSS_DECRYPTED |
407 | . = ALIGN(PAGE_SIZE); |
408 | __bss_stop = .; |
409 | } |
410 | |
411 | /* |
412 | * The memory occupied from _text to here, __end_of_kernel_reserve, is |
413 | * automatically reserved in setup_arch(). Anything after here must be |
414 | * explicitly reserved using memblock_reserve() or it will be discarded |
415 | * and treated as available memory. |
416 | */ |
417 | __end_of_kernel_reserve = .; |
418 | |
419 | . = ALIGN(PAGE_SIZE); |
420 | .brk : AT(ADDR(.brk) - LOAD_OFFSET) { |
421 | __brk_base = .; |
422 | . += 64 * 1024; /* 64k alignment slop space */ |
423 | *(.bss..brk) /* areas brk users have reserved */ |
424 | __brk_limit = .; |
425 | } |
426 | |
427 | . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */ |
428 | _end = .; |
429 | |
430 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
431 | /* |
432 | * Early scratch/workarea section: Lives outside of the kernel proper |
433 | * (_text - _end). |
434 | * |
435 | * Resides after _end because even though the .brk section is after |
436 | * __end_of_kernel_reserve, the .brk section is later reserved as a |
437 | * part of the kernel. Since it is located after __end_of_kernel_reserve |
438 | * it will be discarded and become part of the available memory. As |
439 | * such, it can only be used by very early boot code and must not be |
440 | * needed afterwards. |
441 | * |
442 | * Currently used by SME for performing in-place encryption of the |
443 | * kernel during boot. Resides on a 2MB boundary to simplify the |
444 | * pagetable setup used for SME in-place encryption. |
445 | */ |
446 | . = ALIGN(HPAGE_SIZE); |
447 | .init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) { |
448 | __init_scratch_begin = .; |
449 | *(.init.scratch) |
450 | . = ALIGN(HPAGE_SIZE); |
451 | __init_scratch_end = .; |
452 | } |
453 | #endif |
454 | |
455 | STABS_DEBUG |
456 | DWARF_DEBUG |
457 | ELF_DETAILS |
458 | |
459 | DISCARDS |
460 | |
461 | /* |
462 | * Make sure that the .got.plt is either completely empty or it |
463 | * contains only the lazy dispatch entries. |
464 | */ |
465 | .got.plt (INFO) : { *(.got.plt) } |
466 | ASSERT(SIZEOF(.got.plt) == 0 || |
467 | #ifdef CONFIG_X86_64 |
468 | SIZEOF(.got.plt) == 0x18, |
469 | #else |
470 | SIZEOF(.got.plt) == 0xc, |
471 | #endif |
472 | "Unexpected GOT/PLT entries detected!" ) |
473 | |
474 | /* |
475 | * Sections that should stay zero sized, which is safer to |
476 | * explicitly check instead of blindly discarding. |
477 | */ |
478 | .got : { |
479 | *(.got) *(.igot.*) |
480 | } |
481 | ASSERT(SIZEOF(.got) == 0, "Unexpected GOT entries detected!" ) |
482 | |
483 | .plt : { |
484 | *(.plt) *(.plt.*) *(.iplt) |
485 | } |
486 | ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!" ) |
487 | |
488 | .rel.dyn : { |
489 | *(.rel.*) *(.rel_*) |
490 | } |
491 | ASSERT(SIZEOF(.rel.dyn) == 0, "Unexpected run-time relocations (.rel) detected!" ) |
492 | |
493 | .rela.dyn : { |
494 | *(.rela.*) *(.rela_*) |
495 | } |
496 | ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!" ) |
497 | } |
498 | |
499 | /* |
500 | * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: |
501 | */ |
502 | . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), |
503 | "kernel image bigger than KERNEL_IMAGE_SIZE" ); |
504 | |
505 | #ifdef CONFIG_X86_64 |
506 | /* |
507 | * Per-cpu symbols which need to be offset from __per_cpu_load |
508 | * for the boot processor. |
509 | */ |
510 | #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load |
511 | INIT_PER_CPU(gdt_page); |
512 | INIT_PER_CPU(fixed_percpu_data); |
513 | INIT_PER_CPU(irq_stack_backing_store); |
514 | |
515 | #ifdef CONFIG_SMP |
516 | . = ASSERT((fixed_percpu_data == 0), |
517 | "fixed_percpu_data is not at start of per-cpu area" ); |
518 | #endif |
519 | |
520 | #ifdef CONFIG_CPU_UNRET_ENTRY |
521 | . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned" ); |
522 | #endif |
523 | |
524 | #ifdef CONFIG_CPU_SRSO |
525 | . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned" ); |
526 | /* |
527 | * GNU ld cannot do XOR until 2.41. |
528 | * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1 |
529 | * |
530 | * LLVM lld cannot do XOR until lld-17. |
531 | * https://github.com/llvm/llvm-project/commit/fae96104d4378166cbe5c875ef8ed808a356f3fb |
532 | * |
533 | * Instead do: (A | B) - (A & B) in order to compute the XOR |
534 | * of the two function addresses: |
535 | */ |
536 | . = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) - |
537 | (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)), |
538 | "SRSO function pair won't alias" ); |
539 | #endif |
540 | |
541 | #endif /* CONFIG_X86_64 */ |
542 | |