1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * ld script for the x86 kernel |
4 | * |
5 | * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> |
6 | * |
7 | * Modernisation, unification and other changes and fixes: |
8 | * Copyright (C) 2007-2009 Sam Ravnborg <sam@ravnborg.org> |
9 | * |
10 | * |
11 | * Don't define absolute symbols until and unless you know that symbol |
12 | * value is should remain constant even if kernel image is relocated |
13 | * at run time. Absolute symbols are not relocated. If symbol value should |
14 | * change if kernel is relocated, make the symbol section relative and |
15 | * put it inside the section definition. |
16 | */ |
17 | |
18 | #define LOAD_OFFSET __START_KERNEL_map |
19 | |
20 | #define RUNTIME_DISCARD_EXIT |
21 | #define EMITS_PT_NOTE |
22 | #define RO_EXCEPTION_TABLE_ALIGN 16 |
23 | |
24 | #include <asm-generic/vmlinux.lds.h> |
25 | #include <asm/asm-offsets.h> |
26 | #include <asm/thread_info.h> |
27 | #include <asm/page_types.h> |
28 | #include <asm/orc_lookup.h> |
29 | #include <asm/cache.h> |
30 | #include <asm/boot.h> |
31 | #include <asm/kexec.h> |
32 | |
33 | #undef i386 /* in case the preprocessor is a 32bit one */ |
34 | |
35 | OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT) |
36 | |
37 | #ifdef CONFIG_X86_32 |
38 | OUTPUT_ARCH(i386) |
39 | ENTRY(phys_startup_32) |
40 | #else |
41 | OUTPUT_ARCH(i386:x86-64) |
42 | ENTRY(phys_startup_64) |
43 | #endif |
44 | |
45 | jiffies = jiffies_64; |
46 | const_current_task = current_task; |
47 | const_cpu_current_top_of_stack = cpu_current_top_of_stack; |
48 | |
49 | #if defined(CONFIG_X86_64) |
50 | /* |
51 | * On 64-bit, align RODATA to 2MB so we retain large page mappings for |
52 | * boundaries spanning kernel text, rodata and data sections. |
53 | * |
54 | * However, kernel identity mappings will have different RWX permissions |
55 | * to the pages mapping to text and to the pages padding (which are freed) the |
56 | * text section. Hence kernel identity mappings will be broken to smaller |
57 | * pages. For 64-bit, kernel text and kernel identity mappings are different, |
58 | * so we can enable protection checks as well as retain 2MB large page |
59 | * mappings for kernel text. |
60 | */ |
61 | #define X86_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); |
62 | |
63 | #define X86_ALIGN_RODATA_END \ |
64 | . = ALIGN(HPAGE_SIZE); \ |
65 | __end_rodata_hpage_align = .; \ |
66 | __end_rodata_aligned = .; |
67 | |
68 | #define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); |
69 | #define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); |
70 | |
71 | /* |
72 | * This section contains data which will be mapped as decrypted. Memory |
73 | * encryption operates on a page basis. Make this section PMD-aligned |
74 | * to avoid splitting the pages while mapping the section early. |
75 | * |
76 | * Note: We use a separate section so that only this section gets |
77 | * decrypted to avoid exposing more than we wish. |
78 | */ |
79 | #define BSS_DECRYPTED \ |
80 | . = ALIGN(PMD_SIZE); \ |
81 | __start_bss_decrypted = .; \ |
82 | __pi___start_bss_decrypted = .; \ |
83 | *(.bss..decrypted); \ |
84 | . = ALIGN(PAGE_SIZE); \ |
85 | __start_bss_decrypted_unused = .; \ |
86 | . = ALIGN(PMD_SIZE); \ |
87 | __end_bss_decrypted = .; \ |
88 | __pi___end_bss_decrypted = .; \ |
89 | |
90 | #else |
91 | |
92 | #define X86_ALIGN_RODATA_BEGIN |
93 | #define X86_ALIGN_RODATA_END \ |
94 | . = ALIGN(PAGE_SIZE); \ |
95 | __end_rodata_aligned = .; |
96 | |
97 | #define ALIGN_ENTRY_TEXT_BEGIN |
98 | #define ALIGN_ENTRY_TEXT_END |
99 | #define BSS_DECRYPTED |
100 | |
101 | #endif |
102 | #if defined(CONFIG_X86_64) && defined(CONFIG_KEXEC_CORE) |
103 | #define KEXEC_RELOCATE_KERNEL \ |
104 | . = ALIGN(0x100); \ |
105 | __relocate_kernel_start = .; \ |
106 | *(.text..relocate_kernel); \ |
107 | *(.data..relocate_kernel); \ |
108 | __relocate_kernel_end = .; |
109 | |
110 | ASSERT(__relocate_kernel_end - __relocate_kernel_start <= KEXEC_CONTROL_CODE_MAX_SIZE, |
111 | "relocate_kernel code too large!" ) |
112 | #else |
113 | #define KEXEC_RELOCATE_KERNEL |
114 | #endif |
115 | PHDRS { |
116 | text PT_LOAD FLAGS(5); /* R_E */ |
117 | data PT_LOAD FLAGS(6); /* RW_ */ |
118 | note PT_NOTE FLAGS(0); /* ___ */ |
119 | } |
120 | |
121 | SECTIONS |
122 | { |
123 | . = __START_KERNEL; |
124 | #ifdef CONFIG_X86_32 |
125 | phys_startup_32 = ABSOLUTE(startup_32 - LOAD_OFFSET); |
126 | #else |
127 | phys_startup_64 = ABSOLUTE(startup_64 - LOAD_OFFSET); |
128 | #endif |
129 | |
130 | /* Text and read-only data */ |
131 | .text : AT(ADDR(.text) - LOAD_OFFSET) { |
132 | _text = .; |
133 | __pi__text = .; |
134 | _stext = .; |
135 | ALIGN_ENTRY_TEXT_BEGIN |
136 | *(.text..__x86.rethunk_untrain) |
137 | ENTRY_TEXT |
138 | |
139 | #ifdef CONFIG_MITIGATION_SRSO |
140 | /* |
141 | * See the comment above srso_alias_untrain_ret()'s |
142 | * definition. |
143 | */ |
144 | . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20); |
145 | *(.text..__x86.rethunk_safe) |
146 | #endif |
147 | ALIGN_ENTRY_TEXT_END |
148 | |
149 | TEXT_TEXT |
150 | SCHED_TEXT |
151 | LOCK_TEXT |
152 | KPROBES_TEXT |
153 | SOFTIRQENTRY_TEXT |
154 | #ifdef CONFIG_MITIGATION_RETPOLINE |
155 | *(.text..__x86.indirect_thunk) |
156 | *(.text..__x86.return_thunk) |
157 | #endif |
158 | STATIC_CALL_TEXT |
159 | *(.gnu.warning) |
160 | |
161 | } :text = 0xcccccccc |
162 | |
163 | /* bootstrapping code */ |
164 | .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { |
165 | HEAD_TEXT |
166 | } :text = 0xcccccccc |
167 | |
168 | /* End of text section, which should occupy whole number of pages */ |
169 | _etext = .; |
170 | . = ALIGN(PAGE_SIZE); |
171 | |
172 | X86_ALIGN_RODATA_BEGIN |
173 | RO_DATA(PAGE_SIZE) |
174 | X86_ALIGN_RODATA_END |
175 | |
176 | /* Data */ |
177 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
178 | /* Start of data section */ |
179 | _sdata = .; |
180 | |
181 | /* init_task */ |
182 | INIT_TASK_DATA(THREAD_SIZE) |
183 | |
184 | /* equivalent to task_pt_regs(&init_task) */ |
185 | __top_init_kernel_stack = __end_init_stack - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE; |
186 | |
187 | #ifdef CONFIG_X86_32 |
188 | /* 32 bit has nosave before _edata */ |
189 | NOSAVE_DATA |
190 | #endif |
191 | |
192 | PAGE_ALIGNED_DATA(PAGE_SIZE) |
193 | |
194 | CACHE_HOT_DATA(L1_CACHE_BYTES) |
195 | |
196 | CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) |
197 | |
198 | DATA_DATA |
199 | CONSTRUCTORS |
200 | KEXEC_RELOCATE_KERNEL |
201 | |
202 | /* rarely changed data like cpu maps */ |
203 | READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES) |
204 | |
205 | /* End of data section */ |
206 | _edata = .; |
207 | } :data |
208 | |
209 | BUG_TABLE |
210 | |
211 | ORC_UNWIND_TABLE |
212 | |
213 | /* Init code and data - will be freed after init */ |
214 | . = ALIGN(PAGE_SIZE); |
215 | .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { |
216 | __init_begin = .; /* paired with __init_end */ |
217 | } |
218 | |
219 | INIT_TEXT_SECTION(PAGE_SIZE) |
220 | |
221 | /* |
222 | * Section for code used exclusively before alternatives are run. All |
223 | * references to such code must be patched out by alternatives, normally |
224 | * by using X86_FEATURE_ALWAYS CPU feature bit. |
225 | * |
226 | * See static_cpu_has() for an example. |
227 | */ |
228 | .altinstr_aux : AT(ADDR(.altinstr_aux) - LOAD_OFFSET) { |
229 | *(.altinstr_aux) |
230 | } |
231 | |
232 | INIT_DATA_SECTION(16) |
233 | |
234 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { |
235 | __x86_cpu_dev_start = .; |
236 | *(.x86_cpu_dev.init) |
237 | __x86_cpu_dev_end = .; |
238 | } |
239 | |
240 | #ifdef CONFIG_X86_INTEL_MID |
241 | .x86_intel_mid_dev.init : AT(ADDR(.x86_intel_mid_dev.init) - \ |
242 | LOAD_OFFSET) { |
243 | __x86_intel_mid_dev_start = .; |
244 | *(.x86_intel_mid_dev.init) |
245 | __x86_intel_mid_dev_end = .; |
246 | } |
247 | #endif |
248 | |
249 | #ifdef CONFIG_MITIGATION_RETPOLINE |
250 | /* |
251 | * List of instructions that call/jmp/jcc to retpoline thunks |
252 | * __x86_indirect_thunk_*(). These instructions can be patched along |
253 | * with alternatives, after which the section can be freed. |
254 | */ |
255 | . = ALIGN(8); |
256 | .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) { |
257 | __retpoline_sites = .; |
258 | *(.retpoline_sites) |
259 | __retpoline_sites_end = .; |
260 | } |
261 | |
262 | . = ALIGN(8); |
263 | .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { |
264 | __return_sites = .; |
265 | *(.return_sites) |
266 | __return_sites_end = .; |
267 | } |
268 | |
269 | . = ALIGN(8); |
270 | .call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) { |
271 | __call_sites = .; |
272 | *(.call_sites) |
273 | __call_sites_end = .; |
274 | } |
275 | #endif |
276 | |
277 | #ifdef CONFIG_X86_KERNEL_IBT |
278 | . = ALIGN(8); |
279 | .ibt_endbr_seal : AT(ADDR(.ibt_endbr_seal) - LOAD_OFFSET) { |
280 | __ibt_endbr_seal = .; |
281 | *(.ibt_endbr_seal) |
282 | __ibt_endbr_seal_end = .; |
283 | } |
284 | #endif |
285 | |
286 | #ifdef CONFIG_FINEIBT |
287 | . = ALIGN(8); |
288 | .cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) { |
289 | __cfi_sites = .; |
290 | *(.cfi_sites) |
291 | __cfi_sites_end = .; |
292 | } |
293 | #endif |
294 | |
295 | /* |
296 | * struct alt_inst entries. From the header (alternative.h): |
297 | * "Alternative instructions for different CPU types or capabilities" |
298 | * Think locking instructions on spinlocks. |
299 | */ |
300 | . = ALIGN(8); |
301 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { |
302 | __alt_instructions = .; |
303 | *(.altinstructions) |
304 | __alt_instructions_end = .; |
305 | } |
306 | |
307 | /* |
308 | * And here are the replacement instructions. The linker sticks |
309 | * them as binary blobs. The .altinstructions has enough data to |
310 | * get the address and the length of them to patch the kernel safely. |
311 | */ |
312 | .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { |
313 | *(.altinstr_replacement) |
314 | } |
315 | |
316 | . = ALIGN(8); |
317 | .apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) { |
318 | __apicdrivers = .; |
319 | *(.apicdrivers); |
320 | __apicdrivers_end = .; |
321 | } |
322 | |
323 | . = ALIGN(8); |
324 | /* |
325 | * .exit.text is discarded at runtime, not link time, to deal with |
326 | * references from .altinstructions |
327 | */ |
328 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { |
329 | EXIT_TEXT |
330 | } |
331 | |
332 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { |
333 | EXIT_DATA |
334 | } |
335 | |
336 | PERCPU_SECTION(L1_CACHE_BYTES) |
337 | ASSERT(__per_cpu_hot_end - __per_cpu_hot_start <= 64, "percpu cache hot data too large" ) |
338 | |
339 | RUNTIME_CONST_VARIABLES |
340 | RUNTIME_CONST(ptr, USER_PTR_MAX) |
341 | |
342 | . = ALIGN(PAGE_SIZE); |
343 | |
344 | /* freed after init ends here */ |
345 | .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) { |
346 | __init_end = .; |
347 | } |
348 | |
349 | /* |
350 | * smp_locks might be freed after init |
351 | * start/end must be page aligned |
352 | */ |
353 | . = ALIGN(PAGE_SIZE); |
354 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { |
355 | __smp_locks = .; |
356 | *(.smp_locks) |
357 | . = ALIGN(PAGE_SIZE); |
358 | __smp_locks_end = .; |
359 | } |
360 | |
361 | #ifdef CONFIG_X86_64 |
362 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { |
363 | NOSAVE_DATA |
364 | } |
365 | #endif |
366 | |
367 | /* BSS */ |
368 | . = ALIGN(PAGE_SIZE); |
369 | .bss : AT(ADDR(.bss) - LOAD_OFFSET) { |
370 | __bss_start = .; |
371 | *(.bss..page_aligned) |
372 | . = ALIGN(PAGE_SIZE); |
373 | *(BSS_MAIN) |
374 | BSS_DECRYPTED |
375 | . = ALIGN(PAGE_SIZE); |
376 | __bss_stop = .; |
377 | } |
378 | |
379 | /* |
380 | * The memory occupied from _text to here, __end_of_kernel_reserve, is |
381 | * automatically reserved in setup_arch(). Anything after here must be |
382 | * explicitly reserved using memblock_reserve() or it will be discarded |
383 | * and treated as available memory. |
384 | */ |
385 | __end_of_kernel_reserve = .; |
386 | |
387 | . = ALIGN(PAGE_SIZE); |
388 | .brk : AT(ADDR(.brk) - LOAD_OFFSET) { |
389 | __brk_base = .; |
390 | . += 64 * 1024; /* 64k alignment slop space */ |
391 | *(.bss..brk) /* areas brk users have reserved */ |
392 | __brk_limit = .; |
393 | } |
394 | |
395 | . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */ |
396 | _end = .; |
397 | __pi__end = .; |
398 | |
399 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
400 | /* |
401 | * Early scratch/workarea section: Lives outside of the kernel proper |
402 | * (_text - _end). |
403 | * |
404 | * Resides after _end because even though the .brk section is after |
405 | * __end_of_kernel_reserve, the .brk section is later reserved as a |
406 | * part of the kernel. Since it is located after __end_of_kernel_reserve |
407 | * it will be discarded and become part of the available memory. As |
408 | * such, it can only be used by very early boot code and must not be |
409 | * needed afterwards. |
410 | * |
411 | * Currently used by SME for performing in-place encryption of the |
412 | * kernel during boot. Resides on a 2MB boundary to simplify the |
413 | * pagetable setup used for SME in-place encryption. |
414 | */ |
415 | . = ALIGN(HPAGE_SIZE); |
416 | .init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) { |
417 | __init_scratch_begin = .; |
418 | *(.init.scratch) |
419 | . = ALIGN(HPAGE_SIZE); |
420 | __init_scratch_end = .; |
421 | } |
422 | #endif |
423 | |
424 | STABS_DEBUG |
425 | DWARF_DEBUG |
426 | #ifdef CONFIG_PROPELLER_CLANG |
427 | .llvm_bb_addr_map : { *(.llvm_bb_addr_map) } |
428 | #endif |
429 | |
430 | ELF_DETAILS |
431 | |
432 | DISCARDS |
433 | |
434 | /* |
435 | * Make sure that the .got.plt is either completely empty or it |
436 | * contains only the lazy dispatch entries. |
437 | */ |
438 | .got.plt (INFO) : { *(.got.plt) } |
439 | ASSERT(SIZEOF(.got.plt) == 0 || |
440 | #ifdef CONFIG_X86_64 |
441 | SIZEOF(.got.plt) == 0x18, |
442 | #else |
443 | SIZEOF(.got.plt) == 0xc, |
444 | #endif |
445 | "Unexpected GOT/PLT entries detected!" ) |
446 | |
447 | /* |
448 | * Sections that should stay zero sized, which is safer to |
449 | * explicitly check instead of blindly discarding. |
450 | */ |
451 | .got : { |
452 | *(.got) *(.igot.*) |
453 | } |
454 | ASSERT(SIZEOF(.got) == 0, "Unexpected GOT entries detected!" ) |
455 | |
456 | .plt : { |
457 | *(.plt) *(.plt.*) *(.iplt) |
458 | } |
459 | ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!" ) |
460 | |
461 | .rel.dyn : { |
462 | *(.rel.*) *(.rel_*) |
463 | } |
464 | ASSERT(SIZEOF(.rel.dyn) == 0, "Unexpected run-time relocations (.rel) detected!" ) |
465 | |
466 | .rela.dyn : { |
467 | *(.rela.*) *(.rela_*) |
468 | } |
469 | ASSERT(SIZEOF(.rela.dyn) == 0, "Unexpected run-time relocations (.rela) detected!" ) |
470 | } |
471 | |
472 | /* |
473 | * COMPILE_TEST kernels can be large - CONFIG_KASAN, for example, can cause |
474 | * this. Let's assume that nobody will be running a COMPILE_TEST kernel and |
475 | * let's assert that fuller build coverage is more valuable than being able to |
476 | * run a COMPILE_TEST kernel. |
477 | */ |
478 | #ifndef CONFIG_COMPILE_TEST |
479 | /* |
480 | * The ASSERT() sync to . is intentional, for binutils 2.14 compatibility: |
481 | */ |
482 | . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), |
483 | "kernel image bigger than KERNEL_IMAGE_SIZE" ); |
484 | #endif |
485 | |
486 | /* needed for Clang - see arch/x86/entry/entry.S */ |
487 | PROVIDE(__ref_stack_chk_guard = __stack_chk_guard); |
488 | |
489 | #ifdef CONFIG_X86_64 |
490 | |
491 | #ifdef CONFIG_MITIGATION_UNRET_ENTRY |
492 | . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned" ); |
493 | #endif |
494 | |
495 | #ifdef CONFIG_MITIGATION_SRSO |
496 | . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned" ); |
497 | /* |
498 | * GNU ld cannot do XOR until 2.41. |
499 | * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1 |
500 | * |
501 | * LLVM lld cannot do XOR until lld-17. |
502 | * https://github.com/llvm/llvm-project/commit/fae96104d4378166cbe5c875ef8ed808a356f3fb |
503 | * |
504 | * Instead do: (A | B) - (A & B) in order to compute the XOR |
505 | * of the two function addresses: |
506 | */ |
507 | . = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) - |
508 | (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)), |
509 | "SRSO function pair won't alias" ); |
510 | #endif |
511 | |
512 | #if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) |
513 | . = ASSERT(__x86_indirect_its_thunk_rax & 0x20, "__x86_indirect_thunk_rax not in second half of cacheline" ); |
514 | . = ASSERT(((__x86_indirect_its_thunk_rcx - __x86_indirect_its_thunk_rax) % 64) == 0, "Indirect thunks are not cacheline apart" ); |
515 | . = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array" ); |
516 | #endif |
517 | |
518 | #if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B) |
519 | . = ASSERT(its_return_thunk & 0x20, "its_return_thunk not in second half of cacheline" ); |
520 | #endif |
521 | |
522 | #endif /* CONFIG_X86_64 */ |
523 | |
524 | /* |
525 | * The symbols below are referenced using relative relocations in the |
526 | * respective ELF notes. This produces build time constants that the |
527 | * linker will never mark as relocatable. (Using just ABSOLUTE() is not |
528 | * sufficient for that). |
529 | */ |
530 | #ifdef CONFIG_XEN_PV |
531 | xen_elfnote_entry_value = |
532 | ABSOLUTE(xen_elfnote_entry) + ABSOLUTE(startup_xen); |
533 | #endif |
534 | #ifdef CONFIG_PVH |
535 | xen_elfnote_phys32_entry_value = |
536 | ABSOLUTE(xen_elfnote_phys32_entry) + ABSOLUTE(pvh_start_xen - LOAD_OFFSET); |
537 | #endif |
538 | |