| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | /* |
| 3 | * linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit |
| 4 | * |
| 5 | * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE |
| 6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
| 7 | * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> |
| 8 | * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> |
| 9 | * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com> |
| 10 | */ |
| 11 | |
| 12 | #include <linux/export.h> |
| 13 | #include <linux/linkage.h> |
| 14 | #include <linux/threads.h> |
| 15 | #include <linux/init.h> |
| 16 | #include <linux/pgtable.h> |
| 17 | #include <asm/segment.h> |
| 18 | #include <asm/page.h> |
| 19 | #include <asm/msr.h> |
| 20 | #include <asm/cache.h> |
| 21 | #include <asm/processor-flags.h> |
| 22 | #include <asm/percpu.h> |
| 23 | #include <asm/nops.h> |
| 24 | #include "../entry/calling.h" |
| 25 | #include <asm/nospec-branch.h> |
| 26 | #include <asm/apicdef.h> |
| 27 | #include <asm/fixmap.h> |
| 28 | #include <asm/smp.h> |
| 29 | #include <asm/thread_info.h> |
| 30 | |
| 31 | /* |
| 32 | * We are not able to switch in one step to the final KERNEL ADDRESS SPACE |
| 33 | * because we need identity-mapped pages. |
| 34 | */ |
| 35 | |
| 36 | __INIT |
| 37 | .code64 |
| 38 | SYM_CODE_START_NOALIGN(startup_64) |
| 39 | UNWIND_HINT_END_OF_STACK |
| 40 | /* |
| 41 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, |
| 42 | * and someone has loaded an identity mapped page table |
| 43 | * for us. These identity mapped page tables map all of the |
| 44 | * kernel pages and possibly all of memory. |
| 45 | * |
| 46 | * %RSI holds the physical address of the boot_params structure |
| 47 | * provided by the bootloader. Preserve it in %R15 so C function calls |
| 48 | * will not clobber it. |
| 49 | * |
| 50 | * We come here either directly from a 64bit bootloader, or from |
| 51 | * arch/x86/boot/compressed/head_64.S. |
| 52 | * |
| 53 | * We only come here initially at boot nothing else comes here. |
| 54 | * |
| 55 | * Since we may be loaded at an address different from what we were |
| 56 | * compiled to run at we first fixup the physical addresses in our page |
| 57 | * tables and then reload them. |
| 58 | */ |
| 59 | mov %rsi, %r15 |
| 60 | |
| 61 | /* Set up the stack for verify_cpu() */ |
| 62 | leaq __top_init_kernel_stack(%rip), %rsp |
| 63 | |
| 64 | /* |
| 65 | * Set up GSBASE. |
| 66 | * Note that on SMP the boot CPU uses the init data section until |
| 67 | * the per-CPU areas are set up. |
| 68 | */ |
| 69 | movl $MSR_GS_BASE, %ecx |
| 70 | xorl %eax, %eax |
| 71 | xorl %edx, %edx |
| 72 | wrmsr |
| 73 | |
| 74 | call __pi_startup_64_setup_gdt_idt |
| 75 | |
| 76 | /* Now switch to __KERNEL_CS so IRET works reliably */ |
| 77 | pushq $__KERNEL_CS |
| 78 | leaq .Lon_kernel_cs(%rip), %rax |
| 79 | pushq %rax |
| 80 | lretq |
| 81 | |
| 82 | .Lon_kernel_cs: |
| 83 | ANNOTATE_NOENDBR |
| 84 | UNWIND_HINT_END_OF_STACK |
| 85 | |
| 86 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
| 87 | /* |
| 88 | * Activate SEV/SME memory encryption if supported/enabled. This needs to |
| 89 | * be done now, since this also includes setup of the SEV-SNP CPUID table, |
| 90 | * which needs to be done before any CPUID instructions are executed in |
| 91 | * subsequent code. Pass the boot_params pointer as the first argument. |
| 92 | */ |
| 93 | movq %r15, %rdi |
| 94 | call __pi_sme_enable |
| 95 | #endif |
| 96 | |
| 97 | /* Sanitize CPU configuration */ |
| 98 | call verify_cpu |
| 99 | |
| 100 | /* |
| 101 | * Derive the kernel's physical-to-virtual offset from the physical and |
| 102 | * virtual addresses of common_startup_64(). |
| 103 | */ |
| 104 | leaq common_startup_64(%rip), %rdi |
| 105 | subq .Lcommon_startup_64(%rip), %rdi |
| 106 | |
| 107 | /* |
| 108 | * Perform pagetable fixups. Additionally, if SME is active, encrypt |
| 109 | * the kernel and retrieve the modifier (SME encryption mask if SME |
| 110 | * is active) to be added to the initial pgdir entry that will be |
| 111 | * programmed into CR3. |
| 112 | */ |
| 113 | movq %r15, %rsi |
| 114 | call __pi___startup_64 |
| 115 | |
| 116 | /* Form the CR3 value being sure to include the CR3 modifier */ |
| 117 | leaq early_top_pgt(%rip), %rcx |
| 118 | addq %rcx, %rax |
| 119 | |
| 120 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
| 121 | mov %rax, %rdi |
| 122 | |
| 123 | /* |
| 124 | * For SEV guests: Verify that the C-bit is correct. A malicious |
| 125 | * hypervisor could lie about the C-bit position to perform a ROP |
| 126 | * attack on the guest by writing to the unencrypted stack and wait for |
| 127 | * the next RET instruction. |
| 128 | */ |
| 129 | call sev_verify_cbit |
| 130 | #endif |
| 131 | |
| 132 | /* |
| 133 | * Switch to early_top_pgt which still has the identity mappings |
| 134 | * present. |
| 135 | */ |
| 136 | movq %rax, %cr3 |
| 137 | |
| 138 | /* Branch to the common startup code at its kernel virtual address */ |
| 139 | ANNOTATE_RETPOLINE_SAFE |
| 140 | jmp *.Lcommon_startup_64(%rip) |
| 141 | SYM_CODE_END(startup_64) |
| 142 | |
| 143 | __INITRODATA |
| 144 | SYM_DATA_LOCAL(.Lcommon_startup_64, .quad common_startup_64) |
| 145 | |
| 146 | .text |
| 147 | SYM_CODE_START(secondary_startup_64) |
| 148 | UNWIND_HINT_END_OF_STACK |
| 149 | ANNOTATE_NOENDBR |
| 150 | /* |
| 151 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, |
| 152 | * and someone has loaded a mapped page table. |
| 153 | * |
| 154 | * We come here either from startup_64 (using physical addresses) |
| 155 | * or from trampoline.S (using virtual addresses). |
| 156 | * |
| 157 | * Using virtual addresses from trampoline.S removes the need |
| 158 | * to have any identity mapped pages in the kernel page table |
| 159 | * after the boot processor executes this code. |
| 160 | */ |
| 161 | |
| 162 | /* Sanitize CPU configuration */ |
| 163 | call verify_cpu |
| 164 | |
| 165 | /* |
| 166 | * The secondary_startup_64_no_verify entry point is only used by |
| 167 | * SEV-ES guests. In those guests the call to verify_cpu() would cause |
| 168 | * #VC exceptions which can not be handled at this stage of secondary |
| 169 | * CPU bringup. |
| 170 | * |
| 171 | * All non SEV-ES systems, especially Intel systems, need to execute |
| 172 | * verify_cpu() above to make sure NX is enabled. |
| 173 | */ |
| 174 | SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) |
| 175 | UNWIND_HINT_END_OF_STACK |
| 176 | ANNOTATE_NOENDBR |
| 177 | |
| 178 | /* Clear %R15 which holds the boot_params pointer on the boot CPU */ |
| 179 | xorl %r15d, %r15d |
| 180 | |
| 181 | /* Derive the runtime physical address of init_top_pgt[] */ |
| 182 | movq phys_base(%rip), %rax |
| 183 | addq $(init_top_pgt - __START_KERNEL_map), %rax |
| 184 | |
| 185 | /* |
| 186 | * Retrieve the modifier (SME encryption mask if SME is active) to be |
| 187 | * added to the initial pgdir entry that will be programmed into CR3. |
| 188 | */ |
| 189 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
| 190 | addq sme_me_mask(%rip), %rax |
| 191 | #endif |
| 192 | /* |
| 193 | * Switch to the init_top_pgt here, away from the trampoline_pgd and |
| 194 | * unmap the identity mapped ranges. |
| 195 | */ |
| 196 | movq %rax, %cr3 |
| 197 | |
| 198 | SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL) |
| 199 | UNWIND_HINT_END_OF_STACK |
| 200 | ANNOTATE_NOENDBR |
| 201 | |
| 202 | /* |
| 203 | * Create a mask of CR4 bits to preserve. Omit PGE in order to flush |
| 204 | * global 1:1 translations from the TLBs. |
| 205 | * |
| 206 | * From the SDM: |
| 207 | * "If CR4.PGE is changing from 0 to 1, there were no global TLB |
| 208 | * entries before the execution; if CR4.PGE is changing from 1 to 0, |
| 209 | * there will be no global TLB entries after the execution." |
| 210 | */ |
| 211 | movl $(X86_CR4_PAE | X86_CR4_LA57), %edx |
| 212 | #ifdef CONFIG_X86_MCE |
| 213 | /* |
| 214 | * Preserve CR4.MCE if the kernel will enable #MC support. |
| 215 | * Clearing MCE may fault in some environments (that also force #MC |
| 216 | * support). Any machine check that occurs before #MC support is fully |
| 217 | * configured will crash the system regardless of the CR4.MCE value set |
| 218 | * here. |
| 219 | */ |
| 220 | orl $X86_CR4_MCE, %edx |
| 221 | #endif |
| 222 | movq %cr4, %rcx |
| 223 | andl %edx, %ecx |
| 224 | |
| 225 | /* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */ |
| 226 | btsl $X86_CR4_PSE_BIT, %ecx |
| 227 | movq %rcx, %cr4 |
| 228 | |
| 229 | /* |
| 230 | * Set CR4.PGE to re-enable global translations. |
| 231 | */ |
| 232 | btsl $X86_CR4_PGE_BIT, %ecx |
| 233 | movq %rcx, %cr4 |
| 234 | |
| 235 | #ifdef CONFIG_SMP |
| 236 | /* |
| 237 | * For parallel boot, the APIC ID is read from the APIC, and then |
| 238 | * used to look up the CPU number. For booting a single CPU, the |
| 239 | * CPU number is encoded in smpboot_control. |
| 240 | * |
| 241 | * Bit 31 STARTUP_READ_APICID (Read APICID from APIC) |
| 242 | * Bit 0-23 CPU# if STARTUP_xx flags are not set |
| 243 | */ |
| 244 | movl smpboot_control(%rip), %ecx |
| 245 | testl $STARTUP_READ_APICID, %ecx |
| 246 | jnz .Lread_apicid |
| 247 | /* |
| 248 | * No control bit set, single CPU bringup. CPU number is provided |
| 249 | * in bit 0-23. This is also the boot CPU case (CPU number 0). |
| 250 | */ |
| 251 | andl $(~STARTUP_PARALLEL_MASK), %ecx |
| 252 | jmp .Lsetup_cpu |
| 253 | |
| 254 | .Lread_apicid: |
| 255 | /* Check whether X2APIC mode is already enabled */ |
| 256 | mov $MSR_IA32_APICBASE, %ecx |
| 257 | rdmsr |
| 258 | testl $X2APIC_ENABLE, %eax |
| 259 | jnz .Lread_apicid_msr |
| 260 | |
| 261 | #ifdef CONFIG_X86_X2APIC |
| 262 | /* |
| 263 | * If system is in X2APIC mode then MMIO base might not be |
| 264 | * mapped causing the MMIO read below to fault. Faults can't |
| 265 | * be handled at that point. |
| 266 | */ |
| 267 | cmpl $0, x2apic_mode(%rip) |
| 268 | jz .Lread_apicid_mmio |
| 269 | |
| 270 | /* Force the AP into X2APIC mode. */ |
| 271 | orl $X2APIC_ENABLE, %eax |
| 272 | wrmsr |
| 273 | jmp .Lread_apicid_msr |
| 274 | #endif |
| 275 | |
| 276 | .Lread_apicid_mmio: |
| 277 | /* Read the APIC ID from the fix-mapped MMIO space. */ |
| 278 | movq apic_mmio_base(%rip), %rcx |
| 279 | addq $APIC_ID, %rcx |
| 280 | movl (%rcx), %eax |
| 281 | shr $24, %eax |
| 282 | jmp .Llookup_AP |
| 283 | |
| 284 | .Lread_apicid_msr: |
| 285 | mov $APIC_X2APIC_ID_MSR, %ecx |
| 286 | rdmsr |
| 287 | |
| 288 | .Llookup_AP: |
| 289 | /* EAX contains the APIC ID of the current CPU */ |
| 290 | xorl %ecx, %ecx |
| 291 | leaq cpuid_to_apicid(%rip), %rbx |
| 292 | |
| 293 | .Lfind_cpunr: |
| 294 | cmpl (%rbx,%rcx,4), %eax |
| 295 | jz .Lsetup_cpu |
| 296 | inc %ecx |
| 297 | #ifdef CONFIG_FORCE_NR_CPUS |
| 298 | cmpl $NR_CPUS, %ecx |
| 299 | #else |
| 300 | cmpl nr_cpu_ids(%rip), %ecx |
| 301 | #endif |
| 302 | jb .Lfind_cpunr |
| 303 | |
| 304 | /* APIC ID not found in the table. Drop the trampoline lock and bail. */ |
| 305 | movq trampoline_lock(%rip), %rax |
| 306 | movl $0, (%rax) |
| 307 | |
| 308 | 1: cli |
| 309 | hlt |
| 310 | jmp 1b |
| 311 | |
| 312 | .Lsetup_cpu: |
| 313 | /* Get the per cpu offset for the given CPU# which is in ECX */ |
| 314 | movq __per_cpu_offset(,%rcx,8), %rdx |
| 315 | #else |
| 316 | xorl %edx, %edx /* zero-extended to clear all of RDX */ |
| 317 | #endif /* CONFIG_SMP */ |
| 318 | |
| 319 | /* |
| 320 | * Setup a boot time stack - Any secondary CPU will have lost its stack |
| 321 | * by now because the cr3-switch above unmaps the real-mode stack. |
| 322 | * |
| 323 | * RDX contains the per-cpu offset |
| 324 | */ |
| 325 | movq current_task(%rdx), %rax |
| 326 | movq TASK_threadsp(%rax), %rsp |
| 327 | |
| 328 | /* |
| 329 | * Now that this CPU is running on its own stack, drop the realmode |
| 330 | * protection. For the boot CPU the pointer is NULL! |
| 331 | */ |
| 332 | movq trampoline_lock(%rip), %rax |
| 333 | testq %rax, %rax |
| 334 | jz .Lsetup_gdt |
| 335 | movl $0, (%rax) |
| 336 | |
| 337 | .Lsetup_gdt: |
| 338 | /* |
| 339 | * We must switch to a new descriptor in kernel space for the GDT |
| 340 | * because soon the kernel won't have access anymore to the userspace |
| 341 | * addresses where we're currently running on. We have to do that here |
| 342 | * because in 32bit we couldn't load a 64bit linear address. |
| 343 | */ |
| 344 | subq $16, %rsp |
| 345 | movw $(GDT_SIZE-1), (%rsp) |
| 346 | leaq gdt_page(%rdx), %rax |
| 347 | movq %rax, 2(%rsp) |
| 348 | lgdt (%rsp) |
| 349 | addq $16, %rsp |
| 350 | |
| 351 | /* set up data segments */ |
| 352 | xorl %eax,%eax |
| 353 | movl %eax,%ds |
| 354 | movl %eax,%ss |
| 355 | movl %eax,%es |
| 356 | |
| 357 | /* |
| 358 | * We don't really need to load %fs or %gs, but load them anyway |
| 359 | * to kill any stale realmode selectors. This allows execution |
| 360 | * under VT hardware. |
| 361 | */ |
| 362 | movl %eax,%fs |
| 363 | movl %eax,%gs |
| 364 | |
| 365 | /* |
| 366 | * Set up GSBASE. |
| 367 | * Note that, on SMP, the boot cpu uses init data section until |
| 368 | * the per cpu areas are set up. |
| 369 | */ |
| 370 | movl $MSR_GS_BASE,%ecx |
| 371 | movl %edx, %eax |
| 372 | shrq $32, %rdx |
| 373 | wrmsr |
| 374 | |
| 375 | /* Setup and Load IDT */ |
| 376 | call early_setup_idt |
| 377 | |
| 378 | /* Check if nx is implemented */ |
| 379 | movl $0x80000001, %eax |
| 380 | cpuid |
| 381 | movl %edx,%edi |
| 382 | |
| 383 | /* Setup EFER (Extended Feature Enable Register) */ |
| 384 | movl $MSR_EFER, %ecx |
| 385 | rdmsr |
| 386 | /* |
| 387 | * Preserve current value of EFER for comparison and to skip |
| 388 | * EFER writes if no change was made (for TDX guest) |
| 389 | */ |
| 390 | movl %eax, %edx |
| 391 | btsl $_EFER_SCE, %eax /* Enable System Call */ |
| 392 | btl $20,%edi /* No Execute supported? */ |
| 393 | jnc 1f |
| 394 | btsl $_EFER_NX, %eax |
| 395 | btsq $_PAGE_BIT_NX,early_pmd_flags(%rip) |
| 396 | |
| 397 | /* Avoid writing EFER if no change was made (for TDX guest) */ |
| 398 | 1: cmpl %edx, %eax |
| 399 | je 1f |
| 400 | xor %edx, %edx |
| 401 | wrmsr /* Make changes effective */ |
| 402 | 1: |
| 403 | /* Setup cr0 */ |
| 404 | movl $CR0_STATE, %eax |
| 405 | /* Make changes effective */ |
| 406 | movq %rax, %cr0 |
| 407 | |
| 408 | /* zero EFLAGS after setting rsp */ |
| 409 | pushq $0 |
| 410 | popfq |
| 411 | |
| 412 | /* Pass the boot_params pointer as first argument */ |
| 413 | movq %r15, %rdi |
| 414 | |
| 415 | .Ljump_to_C_code: |
| 416 | xorl %ebp, %ebp # clear frame pointer |
| 417 | ANNOTATE_RETPOLINE_SAFE |
| 418 | callq *initial_code(%rip) |
| 419 | ud2 |
| 420 | SYM_CODE_END(secondary_startup_64) |
| 421 | |
| 422 | #include "verify_cpu.S" |
| 423 | #include "sev_verify_cbit.S" |
| 424 | |
| 425 | #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_AMD_MEM_ENCRYPT) |
| 426 | /* |
| 427 | * Entry point for soft restart of a CPU. Invoked from xxx_play_dead() for |
| 428 | * restarting the boot CPU or for restarting SEV guest CPUs after CPU hot |
| 429 | * unplug. Everything is set up already except the stack. |
| 430 | */ |
| 431 | SYM_CODE_START(soft_restart_cpu) |
| 432 | ANNOTATE_NOENDBR |
| 433 | UNWIND_HINT_END_OF_STACK |
| 434 | |
| 435 | /* Find the idle task stack */ |
| 436 | movq PER_CPU_VAR(current_task), %rcx |
| 437 | movq TASK_threadsp(%rcx), %rsp |
| 438 | |
| 439 | jmp .Ljump_to_C_code |
| 440 | SYM_CODE_END(soft_restart_cpu) |
| 441 | #endif |
| 442 | |
| 443 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
| 444 | /* |
| 445 | * VC Exception handler used during early boot when running on kernel |
| 446 | * addresses, but before the switch to the idt_table can be made. |
| 447 | * The early_idt_handler_array can't be used here because it calls into a lot |
| 448 | * of __init code and this handler is also used during CPU offlining/onlining. |
| 449 | * Therefore this handler ends up in the .text section so that it stays around |
| 450 | * when .init.text is freed. |
| 451 | */ |
| 452 | SYM_CODE_START_NOALIGN(vc_boot_ghcb) |
| 453 | UNWIND_HINT_IRET_REGS offset=8 |
| 454 | ENDBR |
| 455 | |
| 456 | /* Build pt_regs */ |
| 457 | PUSH_AND_CLEAR_REGS |
| 458 | |
| 459 | /* Call C handler */ |
| 460 | movq %rsp, %rdi |
| 461 | movq ORIG_RAX(%rsp), %rsi |
| 462 | movq initial_vc_handler(%rip), %rax |
| 463 | ANNOTATE_RETPOLINE_SAFE |
| 464 | call *%rax |
| 465 | |
| 466 | /* Unwind pt_regs */ |
| 467 | POP_REGS |
| 468 | |
| 469 | /* Remove Error Code */ |
| 470 | addq $8, %rsp |
| 471 | |
| 472 | iretq |
| 473 | SYM_CODE_END(vc_boot_ghcb) |
| 474 | #endif |
| 475 | |
| 476 | /* Both SMP bootup and ACPI suspend change these variables */ |
| 477 | __REFDATA |
| 478 | .balign 8 |
| 479 | SYM_DATA(initial_code, .quad x86_64_start_kernel) |
| 480 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
| 481 | SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb) |
| 482 | #endif |
| 483 | |
| 484 | SYM_DATA(trampoline_lock, .quad 0); |
| 485 | __FINITDATA |
| 486 | |
| 487 | __INIT |
| 488 | SYM_CODE_START(early_idt_handler_array) |
| 489 | i = 0 |
| 490 | .rept NUM_EXCEPTION_VECTORS |
| 491 | .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 |
| 492 | UNWIND_HINT_IRET_REGS |
| 493 | ENDBR |
| 494 | pushq $0 # Dummy error code, to make stack frame uniform |
| 495 | .else |
| 496 | UNWIND_HINT_IRET_REGS offset=8 |
| 497 | ENDBR |
| 498 | .endif |
| 499 | pushq $i # 72(%rsp) Vector number |
| 500 | jmp early_idt_handler_common |
| 501 | UNWIND_HINT_IRET_REGS |
| 502 | i = i + 1 |
| 503 | .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc |
| 504 | .endr |
| 505 | SYM_CODE_END(early_idt_handler_array) |
| 506 | ANNOTATE_NOENDBR // early_idt_handler_array[NUM_EXCEPTION_VECTORS] |
| 507 | |
| 508 | SYM_CODE_START_LOCAL(early_idt_handler_common) |
| 509 | UNWIND_HINT_IRET_REGS offset=16 |
| 510 | /* |
| 511 | * The stack is the hardware frame, an error code or zero, and the |
| 512 | * vector number. |
| 513 | */ |
| 514 | cld |
| 515 | |
| 516 | incl early_recursion_flag(%rip) |
| 517 | |
| 518 | /* The vector number is currently in the pt_regs->di slot. */ |
| 519 | pushq %rsi /* pt_regs->si */ |
| 520 | movq 8(%rsp), %rsi /* RSI = vector number */ |
| 521 | movq %rdi, 8(%rsp) /* pt_regs->di = RDI */ |
| 522 | pushq %rdx /* pt_regs->dx */ |
| 523 | pushq %rcx /* pt_regs->cx */ |
| 524 | pushq %rax /* pt_regs->ax */ |
| 525 | pushq %r8 /* pt_regs->r8 */ |
| 526 | pushq %r9 /* pt_regs->r9 */ |
| 527 | pushq %r10 /* pt_regs->r10 */ |
| 528 | pushq %r11 /* pt_regs->r11 */ |
| 529 | pushq %rbx /* pt_regs->bx */ |
| 530 | pushq %rbp /* pt_regs->bp */ |
| 531 | pushq %r12 /* pt_regs->r12 */ |
| 532 | pushq %r13 /* pt_regs->r13 */ |
| 533 | pushq %r14 /* pt_regs->r14 */ |
| 534 | pushq %r15 /* pt_regs->r15 */ |
| 535 | UNWIND_HINT_REGS |
| 536 | |
| 537 | movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */ |
| 538 | call do_early_exception |
| 539 | |
| 540 | decl early_recursion_flag(%rip) |
| 541 | jmp restore_regs_and_return_to_kernel |
| 542 | SYM_CODE_END(early_idt_handler_common) |
| 543 | |
| 544 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
| 545 | /* |
| 546 | * VC Exception handler used during very early boot. The |
| 547 | * early_idt_handler_array can't be used because it returns via the |
| 548 | * paravirtualized INTERRUPT_RETURN and pv-ops don't work that early. |
| 549 | * |
| 550 | * XXX it does, fix this. |
| 551 | * |
| 552 | * This handler will end up in the .init.text section and not be |
| 553 | * available to boot secondary CPUs. |
| 554 | */ |
| 555 | SYM_CODE_START_NOALIGN(vc_no_ghcb) |
| 556 | UNWIND_HINT_IRET_REGS offset=8 |
| 557 | ENDBR |
| 558 | |
| 559 | /* Build pt_regs */ |
| 560 | PUSH_AND_CLEAR_REGS |
| 561 | |
| 562 | /* Call C handler */ |
| 563 | movq %rsp, %rdi |
| 564 | movq ORIG_RAX(%rsp), %rsi |
| 565 | call __pi_do_vc_no_ghcb |
| 566 | |
| 567 | /* Unwind pt_regs */ |
| 568 | POP_REGS |
| 569 | |
| 570 | /* Remove Error Code */ |
| 571 | addq $8, %rsp |
| 572 | |
| 573 | /* Pure iret required here - don't use INTERRUPT_RETURN */ |
| 574 | iretq |
| 575 | SYM_CODE_END(vc_no_ghcb) |
| 576 | SYM_PIC_ALIAS(vc_no_ghcb); |
| 577 | #endif |
| 578 | |
| 579 | #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION |
| 580 | /* |
| 581 | * Each PGD needs to be 8k long and 8k aligned. We do not |
| 582 | * ever go out to userspace with these, so we do not |
| 583 | * strictly *need* the second page, but this allows us to |
| 584 | * have a single set_pgd() implementation that does not |
| 585 | * need to worry about whether it has 4k or 8k to work |
| 586 | * with. |
| 587 | * |
| 588 | * This ensures PGDs are 8k long: |
| 589 | */ |
| 590 | #define PTI_USER_PGD_FILL 512 |
| 591 | /* This ensures they are 8k-aligned: */ |
| 592 | #define SYM_DATA_START_PTI_ALIGNED(name) \ |
| 593 | SYM_START(name, SYM_L_GLOBAL, .balign 2 * PAGE_SIZE) |
| 594 | #else |
| 595 | #define SYM_DATA_START_PTI_ALIGNED(name) \ |
| 596 | SYM_DATA_START_PAGE_ALIGNED(name) |
| 597 | #define PTI_USER_PGD_FILL 0 |
| 598 | #endif |
| 599 | |
| 600 | __INITDATA |
| 601 | .balign 4 |
| 602 | |
| 603 | SYM_DATA_START_PTI_ALIGNED(early_top_pgt) |
| 604 | .fill 511,8,0 |
| 605 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
| 606 | .fill PTI_USER_PGD_FILL,8,0 |
| 607 | SYM_DATA_END(early_top_pgt) |
| 608 | SYM_PIC_ALIAS(early_top_pgt) |
| 609 | |
| 610 | SYM_DATA_START_PAGE_ALIGNED(early_dynamic_pgts) |
| 611 | .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 |
| 612 | SYM_DATA_END(early_dynamic_pgts) |
| 613 | SYM_PIC_ALIAS(early_dynamic_pgts); |
| 614 | |
| 615 | SYM_DATA(early_recursion_flag, .long 0) |
| 616 | |
| 617 | .data |
| 618 | |
| 619 | #if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH) |
| 620 | SYM_DATA_START_PTI_ALIGNED(init_top_pgt) |
| 621 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
| 622 | .org init_top_pgt + L4_PAGE_OFFSET*8, 0 |
| 623 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
| 624 | .org init_top_pgt + L4_START_KERNEL*8, 0 |
| 625 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ |
| 626 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
| 627 | .fill PTI_USER_PGD_FILL,8,0 |
| 628 | SYM_DATA_END(init_top_pgt) |
| 629 | |
| 630 | SYM_DATA_START_PAGE_ALIGNED(level3_ident_pgt) |
| 631 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
| 632 | .fill 511, 8, 0 |
| 633 | SYM_DATA_END(level3_ident_pgt) |
| 634 | SYM_DATA_START_PAGE_ALIGNED(level2_ident_pgt) |
| 635 | /* |
| 636 | * Since I easily can, map the first 1G. |
| 637 | * Don't set NX because code runs from these pages. |
| 638 | * |
| 639 | * Note: This sets _PAGE_GLOBAL despite whether |
| 640 | * the CPU supports it or it is enabled. But, |
| 641 | * the CPU should ignore the bit. |
| 642 | */ |
| 643 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) |
| 644 | SYM_DATA_END(level2_ident_pgt) |
| 645 | #else |
| 646 | SYM_DATA_START_PTI_ALIGNED(init_top_pgt) |
| 647 | .fill 512,8,0 |
| 648 | .fill PTI_USER_PGD_FILL,8,0 |
| 649 | SYM_DATA_END(init_top_pgt) |
| 650 | #endif |
| 651 | |
| 652 | SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt) |
| 653 | .fill 511,8,0 |
| 654 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
| 655 | SYM_DATA_END(level4_kernel_pgt) |
| 656 | SYM_PIC_ALIAS(level4_kernel_pgt) |
| 657 | |
| 658 | SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt) |
| 659 | .fill L3_START_KERNEL,8,0 |
| 660 | /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ |
| 661 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
| 662 | .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
| 663 | SYM_DATA_END(level3_kernel_pgt) |
| 664 | SYM_PIC_ALIAS(level3_kernel_pgt) |
| 665 | |
| 666 | SYM_DATA_START_PAGE_ALIGNED(level2_kernel_pgt) |
| 667 | /* |
| 668 | * Kernel high mapping. |
| 669 | * |
| 670 | * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in |
| 671 | * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, |
| 672 | * 512 MiB otherwise. |
| 673 | * |
| 674 | * (NOTE: after that starts the module area, see MODULES_VADDR.) |
| 675 | * |
| 676 | * This table is eventually used by the kernel during normal runtime. |
| 677 | * Care must be taken to clear out undesired bits later, like _PAGE_RW |
| 678 | * or _PAGE_GLOBAL in some cases. |
| 679 | */ |
| 680 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) |
| 681 | SYM_DATA_END(level2_kernel_pgt) |
| 682 | SYM_PIC_ALIAS(level2_kernel_pgt) |
| 683 | |
| 684 | SYM_DATA_START_PAGE_ALIGNED(level2_fixmap_pgt) |
| 685 | .fill (512 - 4 - FIXMAP_PMD_NUM),8,0 |
| 686 | pgtno = 0 |
| 687 | .rept (FIXMAP_PMD_NUM) |
| 688 | .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \ |
| 689 | + _PAGE_TABLE_NOENC; |
| 690 | pgtno = pgtno + 1 |
| 691 | .endr |
| 692 | /* 6 MB reserved space + a 2MB hole */ |
| 693 | .fill 4,8,0 |
| 694 | SYM_DATA_END(level2_fixmap_pgt) |
| 695 | SYM_PIC_ALIAS(level2_fixmap_pgt) |
| 696 | |
| 697 | SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt) |
| 698 | .rept (FIXMAP_PMD_NUM) |
| 699 | .fill 512,8,0 |
| 700 | .endr |
| 701 | SYM_DATA_END(level1_fixmap_pgt) |
| 702 | |
| 703 | .data |
| 704 | .align 16 |
| 705 | |
| 706 | SYM_DATA(smpboot_control, .long 0) |
| 707 | |
| 708 | .align 16 |
| 709 | /* This must match the first entry in level2_kernel_pgt */ |
| 710 | SYM_DATA(phys_base, .quad 0x0) |
| 711 | SYM_PIC_ALIAS(phys_base); |
| 712 | EXPORT_SYMBOL(phys_base) |
| 713 | |
| 714 | #include "../xen/xen-head.S" |
| 715 | |
| 716 | __PAGE_ALIGNED_BSS |
| 717 | SYM_DATA_START_PAGE_ALIGNED(empty_zero_page) |
| 718 | .skip PAGE_SIZE |
| 719 | SYM_DATA_END(empty_zero_page) |
| 720 | EXPORT_SYMBOL(empty_zero_page) |
| 721 | |
| 722 | |