1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * prepare to run common code |
4 | * |
5 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
6 | */ |
7 | |
8 | #define DISABLE_BRANCH_PROFILING |
9 | |
10 | /* cpu_feature_enabled() cannot be used this early */ |
11 | #define USE_EARLY_PGTABLE_L5 |
12 | |
13 | #include <linux/init.h> |
14 | #include <linux/linkage.h> |
15 | #include <linux/types.h> |
16 | #include <linux/kernel.h> |
17 | #include <linux/string.h> |
18 | #include <linux/percpu.h> |
19 | #include <linux/start_kernel.h> |
20 | #include <linux/io.h> |
21 | #include <linux/memblock.h> |
22 | #include <linux/cc_platform.h> |
23 | #include <linux/pgtable.h> |
24 | |
25 | #include <asm/processor.h> |
26 | #include <asm/proto.h> |
27 | #include <asm/smp.h> |
28 | #include <asm/setup.h> |
29 | #include <asm/desc.h> |
30 | #include <asm/tlbflush.h> |
31 | #include <asm/sections.h> |
32 | #include <asm/kdebug.h> |
33 | #include <asm/e820/api.h> |
34 | #include <asm/bios_ebda.h> |
35 | #include <asm/bootparam_utils.h> |
36 | #include <asm/microcode.h> |
37 | #include <asm/kasan.h> |
38 | #include <asm/fixmap.h> |
39 | #include <asm/realmode.h> |
40 | #include <asm/extable.h> |
41 | #include <asm/trapnr.h> |
42 | #include <asm/sev.h> |
43 | #include <asm/tdx.h> |
44 | #include <asm/init.h> |
45 | |
46 | /* |
47 | * Manage page tables very early on. |
48 | */ |
49 | extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; |
50 | static unsigned int __initdata next_early_pgt; |
51 | pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); |
52 | |
53 | #ifdef CONFIG_X86_5LEVEL |
54 | unsigned int __pgtable_l5_enabled __ro_after_init; |
55 | unsigned int pgdir_shift __ro_after_init = 39; |
56 | EXPORT_SYMBOL(pgdir_shift); |
57 | unsigned int ptrs_per_p4d __ro_after_init = 1; |
58 | EXPORT_SYMBOL(ptrs_per_p4d); |
59 | #endif |
60 | |
61 | #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT |
62 | unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; |
63 | EXPORT_SYMBOL(page_offset_base); |
64 | unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4; |
65 | EXPORT_SYMBOL(vmalloc_base); |
66 | unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; |
67 | EXPORT_SYMBOL(vmemmap_base); |
68 | #endif |
69 | |
70 | /* |
71 | * GDT used on the boot CPU before switching to virtual addresses. |
72 | */ |
73 | static struct desc_struct startup_gdt[GDT_ENTRIES] __initdata = { |
74 | [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), |
75 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), |
76 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), |
77 | }; |
78 | |
79 | /* |
80 | * Address needs to be set at runtime because it references the startup_gdt |
81 | * while the kernel still uses a direct mapping. |
82 | */ |
83 | static struct desc_ptr startup_gdt_descr __initdata = { |
84 | .size = sizeof(startup_gdt)-1, |
85 | .address = 0, |
86 | }; |
87 | |
88 | static void __head *fixup_pointer(void *ptr, unsigned long physaddr) |
89 | { |
90 | return ptr - (void *)_text + (void *)physaddr; |
91 | } |
92 | |
93 | static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr) |
94 | { |
95 | return fixup_pointer(ptr, physaddr); |
96 | } |
97 | |
98 | #ifdef CONFIG_X86_5LEVEL |
99 | static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr) |
100 | { |
101 | return fixup_pointer(ptr, physaddr); |
102 | } |
103 | |
104 | static bool __head check_la57_support(unsigned long physaddr) |
105 | { |
106 | /* |
107 | * 5-level paging is detected and enabled at kernel decompression |
108 | * stage. Only check if it has been enabled there. |
109 | */ |
110 | if (!(native_read_cr4() & X86_CR4_LA57)) |
111 | return false; |
112 | |
113 | *fixup_int(ptr: &__pgtable_l5_enabled, physaddr) = 1; |
114 | *fixup_int(ptr: &pgdir_shift, physaddr) = 48; |
115 | *fixup_int(ptr: &ptrs_per_p4d, physaddr) = 512; |
116 | *fixup_long(ptr: &page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; |
117 | *fixup_long(ptr: &vmalloc_base, physaddr) = __VMALLOC_BASE_L5; |
118 | *fixup_long(ptr: &vmemmap_base, physaddr) = __VMEMMAP_BASE_L5; |
119 | |
120 | return true; |
121 | } |
122 | #else |
123 | static bool __head check_la57_support(unsigned long physaddr) |
124 | { |
125 | return false; |
126 | } |
127 | #endif |
128 | |
129 | static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) |
130 | { |
131 | unsigned long vaddr, vaddr_end; |
132 | int i; |
133 | |
134 | /* Encrypt the kernel and related (if SME is active) */ |
135 | sme_encrypt_kernel(bp); |
136 | |
137 | /* |
138 | * Clear the memory encryption mask from the .bss..decrypted section. |
139 | * The bss section will be memset to zero later in the initialization so |
140 | * there is no need to zero it after changing the memory encryption |
141 | * attribute. |
142 | */ |
143 | if (sme_get_me_mask()) { |
144 | vaddr = (unsigned long)__start_bss_decrypted; |
145 | vaddr_end = (unsigned long)__end_bss_decrypted; |
146 | |
147 | for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { |
148 | /* |
149 | * On SNP, transition the page to shared in the RMP table so that |
150 | * it is consistent with the page table attribute change. |
151 | * |
152 | * __start_bss_decrypted has a virtual address in the high range |
153 | * mapping (kernel .text). PVALIDATE, by way of |
154 | * early_snp_set_memory_shared(), requires a valid virtual |
155 | * address but the kernel is currently running off of the identity |
156 | * mapping so use __pa() to get a *currently* valid virtual address. |
157 | */ |
158 | early_snp_set_memory_shared(__pa(vaddr), __pa(vaddr), PTRS_PER_PMD); |
159 | |
160 | i = pmd_index(address: vaddr); |
161 | pmd[i] -= sme_get_me_mask(); |
162 | } |
163 | } |
164 | |
165 | /* |
166 | * Return the SME encryption mask (if SME is active) to be used as a |
167 | * modifier for the initial pgdir entry programmed into CR3. |
168 | */ |
169 | return sme_get_me_mask(); |
170 | } |
171 | |
172 | /* Code in __startup_64() can be relocated during execution, but the compiler |
173 | * doesn't have to generate PC-relative relocations when accessing globals from |
174 | * that function. Clang actually does not generate them, which leads to |
175 | * boot-time crashes. To work around this problem, every global pointer must |
176 | * be adjusted using fixup_pointer(). |
177 | */ |
178 | unsigned long __head __startup_64(unsigned long physaddr, |
179 | struct boot_params *bp) |
180 | { |
181 | unsigned long load_delta, *p; |
182 | unsigned long pgtable_flags; |
183 | pgdval_t *pgd; |
184 | p4dval_t *p4d; |
185 | pudval_t *pud; |
186 | pmdval_t *pmd, pmd_entry; |
187 | pteval_t *mask_ptr; |
188 | bool la57; |
189 | int i; |
190 | unsigned int *next_pgt_ptr; |
191 | |
192 | la57 = check_la57_support(physaddr); |
193 | |
194 | /* Is the address too large? */ |
195 | if (physaddr >> MAX_PHYSMEM_BITS) |
196 | for (;;); |
197 | |
198 | /* |
199 | * Compute the delta between the address I am compiled to run at |
200 | * and the address I am actually running at. |
201 | */ |
202 | load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map); |
203 | |
204 | /* Is the address not 2M aligned? */ |
205 | if (load_delta & ~PMD_MASK) |
206 | for (;;); |
207 | |
208 | /* Include the SME encryption mask in the fixup value */ |
209 | load_delta += sme_get_me_mask(); |
210 | |
211 | /* Fixup the physical addresses in the page table */ |
212 | |
213 | pgd = fixup_pointer(ptr: early_top_pgt, physaddr); |
214 | p = pgd + pgd_index(__START_KERNEL_map); |
215 | if (la57) |
216 | *p = (unsigned long)level4_kernel_pgt; |
217 | else |
218 | *p = (unsigned long)level3_kernel_pgt; |
219 | *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta; |
220 | |
221 | if (la57) { |
222 | p4d = fixup_pointer(ptr: level4_kernel_pgt, physaddr); |
223 | p4d[511] += load_delta; |
224 | } |
225 | |
226 | pud = fixup_pointer(ptr: level3_kernel_pgt, physaddr); |
227 | pud[510] += load_delta; |
228 | pud[511] += load_delta; |
229 | |
230 | pmd = fixup_pointer(ptr: level2_fixmap_pgt, physaddr); |
231 | for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) |
232 | pmd[i] += load_delta; |
233 | |
234 | /* |
235 | * Set up the identity mapping for the switchover. These |
236 | * entries should *NOT* have the global bit set! This also |
237 | * creates a bunch of nonsense entries but that is fine -- |
238 | * it avoids problems around wraparound. |
239 | */ |
240 | |
241 | next_pgt_ptr = fixup_pointer(ptr: &next_early_pgt, physaddr); |
242 | pud = fixup_pointer(ptr: early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); |
243 | pmd = fixup_pointer(ptr: early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); |
244 | |
245 | pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); |
246 | |
247 | if (la57) { |
248 | p4d = fixup_pointer(ptr: early_dynamic_pgts[(*next_pgt_ptr)++], |
249 | physaddr); |
250 | |
251 | i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; |
252 | pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; |
253 | pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; |
254 | |
255 | i = physaddr >> P4D_SHIFT; |
256 | p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; |
257 | p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; |
258 | } else { |
259 | i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; |
260 | pgd[i + 0] = (pgdval_t)pud + pgtable_flags; |
261 | pgd[i + 1] = (pgdval_t)pud + pgtable_flags; |
262 | } |
263 | |
264 | i = physaddr >> PUD_SHIFT; |
265 | pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; |
266 | pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; |
267 | |
268 | pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; |
269 | /* Filter out unsupported __PAGE_KERNEL_* bits: */ |
270 | mask_ptr = fixup_pointer(ptr: &__supported_pte_mask, physaddr); |
271 | pmd_entry &= *mask_ptr; |
272 | pmd_entry += sme_get_me_mask(); |
273 | pmd_entry += physaddr; |
274 | |
275 | for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { |
276 | int idx = i + (physaddr >> PMD_SHIFT); |
277 | |
278 | pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; |
279 | } |
280 | |
281 | /* |
282 | * Fixup the kernel text+data virtual addresses. Note that |
283 | * we might write invalid pmds, when the kernel is relocated |
284 | * cleanup_highmap() fixes this up along with the mappings |
285 | * beyond _end. |
286 | * |
287 | * Only the region occupied by the kernel image has so far |
288 | * been checked against the table of usable memory regions |
289 | * provided by the firmware, so invalidate pages outside that |
290 | * region. A page table entry that maps to a reserved area of |
291 | * memory would allow processor speculation into that area, |
292 | * and on some hardware (particularly the UV platform) even |
293 | * speculative access to some reserved areas is caught as an |
294 | * error, causing the BIOS to halt the system. |
295 | */ |
296 | |
297 | pmd = fixup_pointer(ptr: level2_kernel_pgt, physaddr); |
298 | |
299 | /* invalidate pages before the kernel image */ |
300 | for (i = 0; i < pmd_index(address: (unsigned long)_text); i++) |
301 | pmd[i] &= ~_PAGE_PRESENT; |
302 | |
303 | /* fixup pages that are part of the kernel image */ |
304 | for (; i <= pmd_index(address: (unsigned long)_end); i++) |
305 | if (pmd[i] & _PAGE_PRESENT) |
306 | pmd[i] += load_delta; |
307 | |
308 | /* invalidate pages after the kernel image */ |
309 | for (; i < PTRS_PER_PMD; i++) |
310 | pmd[i] &= ~_PAGE_PRESENT; |
311 | |
312 | /* |
313 | * Fixup phys_base - remove the memory encryption mask to obtain |
314 | * the true physical address. |
315 | */ |
316 | *fixup_long(ptr: &phys_base, physaddr) += load_delta - sme_get_me_mask(); |
317 | |
318 | return sme_postprocess_startup(bp, pmd); |
319 | } |
320 | |
321 | /* Wipe all early page tables except for the kernel symbol map */ |
322 | static void __init reset_early_page_tables(void) |
323 | { |
324 | memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); |
325 | next_early_pgt = 0; |
326 | write_cr3(__sme_pa_nodebug(early_top_pgt)); |
327 | } |
328 | |
329 | /* Create a new PMD entry */ |
330 | bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd) |
331 | { |
332 | unsigned long physaddr = address - __PAGE_OFFSET; |
333 | pgdval_t pgd, *pgd_p; |
334 | p4dval_t p4d, *p4d_p; |
335 | pudval_t pud, *pud_p; |
336 | pmdval_t *pmd_p; |
337 | |
338 | /* Invalid address or early pgt is done ? */ |
339 | if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt)) |
340 | return false; |
341 | |
342 | again: |
343 | pgd_p = &early_top_pgt[pgd_index(address)].pgd; |
344 | pgd = *pgd_p; |
345 | |
346 | /* |
347 | * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is |
348 | * critical -- __PAGE_OFFSET would point us back into the dynamic |
349 | * range and we might end up looping forever... |
350 | */ |
351 | if (!pgtable_l5_enabled()) |
352 | p4d_p = pgd_p; |
353 | else if (pgd) |
354 | p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); |
355 | else { |
356 | if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { |
357 | reset_early_page_tables(); |
358 | goto again; |
359 | } |
360 | |
361 | p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++]; |
362 | memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); |
363 | *pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; |
364 | } |
365 | p4d_p += p4d_index(address); |
366 | p4d = *p4d_p; |
367 | |
368 | if (p4d) |
369 | pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); |
370 | else { |
371 | if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { |
372 | reset_early_page_tables(); |
373 | goto again; |
374 | } |
375 | |
376 | pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; |
377 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); |
378 | *p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; |
379 | } |
380 | pud_p += pud_index(address); |
381 | pud = *pud_p; |
382 | |
383 | if (pud) |
384 | pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); |
385 | else { |
386 | if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { |
387 | reset_early_page_tables(); |
388 | goto again; |
389 | } |
390 | |
391 | pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; |
392 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); |
393 | *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; |
394 | } |
395 | pmd_p[pmd_index(address)] = pmd; |
396 | |
397 | return true; |
398 | } |
399 | |
400 | static bool __init early_make_pgtable(unsigned long address) |
401 | { |
402 | unsigned long physaddr = address - __PAGE_OFFSET; |
403 | pmdval_t pmd; |
404 | |
405 | pmd = (physaddr & PMD_MASK) + early_pmd_flags; |
406 | |
407 | return __early_make_pgtable(address, pmd); |
408 | } |
409 | |
410 | void __init do_early_exception(struct pt_regs *regs, int trapnr) |
411 | { |
412 | if (trapnr == X86_TRAP_PF && |
413 | early_make_pgtable(address: native_read_cr2())) |
414 | return; |
415 | |
416 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT) && |
417 | trapnr == X86_TRAP_VC && handle_vc_boot_ghcb(regs)) |
418 | return; |
419 | |
420 | if (trapnr == X86_TRAP_VE && tdx_early_handle_ve(regs)) |
421 | return; |
422 | |
423 | early_fixup_exception(regs, trapnr); |
424 | } |
425 | |
426 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
427 | yet. */ |
428 | void __init clear_bss(void) |
429 | { |
430 | memset(__bss_start, 0, |
431 | (unsigned long) __bss_stop - (unsigned long) __bss_start); |
432 | memset(__brk_base, 0, |
433 | (unsigned long) __brk_limit - (unsigned long) __brk_base); |
434 | } |
435 | |
436 | static unsigned long get_cmd_line_ptr(void) |
437 | { |
438 | unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; |
439 | |
440 | cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32; |
441 | |
442 | return cmd_line_ptr; |
443 | } |
444 | |
445 | static void __init copy_bootdata(char *real_mode_data) |
446 | { |
447 | char * command_line; |
448 | unsigned long cmd_line_ptr; |
449 | |
450 | /* |
451 | * If SME is active, this will create decrypted mappings of the |
452 | * boot data in advance of the copy operations. |
453 | */ |
454 | sme_map_bootdata(real_mode_data); |
455 | |
456 | memcpy(&boot_params, real_mode_data, sizeof(boot_params)); |
457 | sanitize_boot_params(boot_params: &boot_params); |
458 | cmd_line_ptr = get_cmd_line_ptr(); |
459 | if (cmd_line_ptr) { |
460 | command_line = __va(cmd_line_ptr); |
461 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); |
462 | } |
463 | |
464 | /* |
465 | * The old boot data is no longer needed and won't be reserved, |
466 | * freeing up that memory for use by the system. If SME is active, |
467 | * we need to remove the mappings that were created so that the |
468 | * memory doesn't remain mapped as decrypted. |
469 | */ |
470 | sme_unmap_bootdata(real_mode_data); |
471 | } |
472 | |
473 | asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode_data) |
474 | { |
475 | /* |
476 | * Build-time sanity checks on the kernel image and module |
477 | * area mappings. (these are purely build-time and produce no code) |
478 | */ |
479 | BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map); |
480 | BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE); |
481 | BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE); |
482 | BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0); |
483 | BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); |
484 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); |
485 | MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == |
486 | (__START_KERNEL & PGDIR_MASK))); |
487 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); |
488 | |
489 | cr4_init_shadow(); |
490 | |
491 | /* Kill off the identity-map trampoline */ |
492 | reset_early_page_tables(); |
493 | |
494 | clear_bss(); |
495 | |
496 | /* |
497 | * This needs to happen *before* kasan_early_init() because latter maps stuff |
498 | * into that page. |
499 | */ |
500 | clear_page(page: init_top_pgt); |
501 | |
502 | /* |
503 | * SME support may update early_pmd_flags to include the memory |
504 | * encryption mask, so it needs to be called before anything |
505 | * that may generate a page fault. |
506 | */ |
507 | sme_early_init(); |
508 | |
509 | kasan_early_init(); |
510 | |
511 | /* |
512 | * Flush global TLB entries which could be left over from the trampoline page |
513 | * table. |
514 | * |
515 | * This needs to happen *after* kasan_early_init() as KASAN-enabled .configs |
516 | * instrument native_write_cr4() so KASAN must be initialized for that |
517 | * instrumentation to work. |
518 | */ |
519 | __native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4)); |
520 | |
521 | idt_setup_early_handler(); |
522 | |
523 | /* Needed before cc_platform_has() can be used for TDX */ |
524 | tdx_early_init(); |
525 | |
526 | copy_bootdata(__va(real_mode_data)); |
527 | |
528 | /* |
529 | * Load microcode early on BSP. |
530 | */ |
531 | load_ucode_bsp(); |
532 | |
533 | /* set init_top_pgt kernel high mapping*/ |
534 | init_top_pgt[511] = early_top_pgt[511]; |
535 | |
536 | x86_64_start_reservations(real_mode_data); |
537 | } |
538 | |
539 | void __init __noreturn x86_64_start_reservations(char *real_mode_data) |
540 | { |
541 | /* version is always not zero if it is copied */ |
542 | if (!boot_params.hdr.version) |
543 | copy_bootdata(__va(real_mode_data)); |
544 | |
545 | x86_early_init_platform_quirks(); |
546 | |
547 | switch (boot_params.hdr.hardware_subarch) { |
548 | case X86_SUBARCH_INTEL_MID: |
549 | x86_intel_mid_early_setup(); |
550 | break; |
551 | default: |
552 | break; |
553 | } |
554 | |
555 | start_kernel(); |
556 | } |
557 | |
558 | /* |
559 | * Data structures and code used for IDT setup in head_64.S. The bringup-IDT is |
560 | * used until the idt_table takes over. On the boot CPU this happens in |
561 | * x86_64_start_kernel(), on secondary CPUs in start_secondary(). In both cases |
562 | * this happens in the functions called from head_64.S. |
563 | * |
564 | * The idt_table can't be used that early because all the code modifying it is |
565 | * in idt.c and can be instrumented by tracing or KASAN, which both don't work |
566 | * during early CPU bringup. Also the idt_table has the runtime vectors |
567 | * configured which require certain CPU state to be setup already (like TSS), |
568 | * which also hasn't happened yet in early CPU bringup. |
569 | */ |
570 | static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data; |
571 | |
572 | static struct desc_ptr bringup_idt_descr = { |
573 | .size = (NUM_EXCEPTION_VECTORS * sizeof(gate_desc)) - 1, |
574 | .address = 0, /* Set at runtime */ |
575 | }; |
576 | |
577 | static void set_bringup_idt_handler(gate_desc *idt, int n, void *handler) |
578 | { |
579 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
580 | struct idt_data data; |
581 | gate_desc desc; |
582 | |
583 | init_idt_data(data: &data, n, addr: handler); |
584 | idt_init_desc(gate: &desc, d: &data); |
585 | native_write_idt_entry(idt, entry: n, gate: &desc); |
586 | #endif |
587 | } |
588 | |
589 | /* This runs while still in the direct mapping */ |
590 | static void __head startup_64_load_idt(unsigned long physbase) |
591 | { |
592 | struct desc_ptr *desc = fixup_pointer(ptr: &bringup_idt_descr, physaddr: physbase); |
593 | gate_desc *idt = fixup_pointer(ptr: bringup_idt_table, physaddr: physbase); |
594 | |
595 | |
596 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { |
597 | void *handler; |
598 | |
599 | /* VMM Communication Exception */ |
600 | handler = fixup_pointer(ptr: vc_no_ghcb, physaddr: physbase); |
601 | set_bringup_idt_handler(idt, X86_TRAP_VC, handler); |
602 | } |
603 | |
604 | desc->address = (unsigned long)idt; |
605 | native_load_idt(dtr: desc); |
606 | } |
607 | |
608 | /* This is used when running on kernel addresses */ |
609 | void early_setup_idt(void) |
610 | { |
611 | /* VMM Communication Exception */ |
612 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { |
613 | setup_ghcb(); |
614 | set_bringup_idt_handler(idt: bringup_idt_table, X86_TRAP_VC, handler: vc_boot_ghcb); |
615 | } |
616 | |
617 | bringup_idt_descr.address = (unsigned long)bringup_idt_table; |
618 | native_load_idt(dtr: &bringup_idt_descr); |
619 | } |
620 | |
621 | /* |
622 | * Setup boot CPU state needed before kernel switches to virtual addresses. |
623 | */ |
624 | void __head startup_64_setup_env(unsigned long physbase) |
625 | { |
626 | /* Load GDT */ |
627 | startup_gdt_descr.address = (unsigned long)fixup_pointer(ptr: startup_gdt, physaddr: physbase); |
628 | native_load_gdt(dtr: &startup_gdt_descr); |
629 | |
630 | /* New GDT is live - reload data segment registers */ |
631 | asm volatile("movl %%eax, %%ds\n" |
632 | "movl %%eax, %%ss\n" |
633 | "movl %%eax, %%es\n" : : "a" (__KERNEL_DS) : "memory" ); |
634 | |
635 | startup_64_load_idt(physbase); |
636 | } |
637 | |