| 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* |
| 3 | * AMD Memory Encryption Support |
| 4 | * |
| 5 | * Copyright (C) 2019 SUSE |
| 6 | * |
| 7 | * Author: Joerg Roedel <jroedel@suse.de> |
| 8 | */ |
| 9 | |
| 10 | #define pr_fmt(fmt) "SEV: " fmt |
| 11 | |
| 12 | #include <linux/bug.h> |
| 13 | #include <linux/kernel.h> |
| 14 | |
| 15 | #include <asm/cpu_entry_area.h> |
| 16 | #include <asm/msr.h> |
| 17 | #include <asm/ptrace.h> |
| 18 | #include <asm/sev.h> |
| 19 | #include <asm/sev-internal.h> |
| 20 | |
| 21 | static __always_inline bool on_vc_stack(struct pt_regs *regs) |
| 22 | { |
| 23 | unsigned long sp = regs->sp; |
| 24 | |
| 25 | /* User-mode RSP is not trusted */ |
| 26 | if (user_mode(regs)) |
| 27 | return false; |
| 28 | |
| 29 | /* SYSCALL gap still has user-mode RSP */ |
| 30 | if (ip_within_syscall_gap(regs)) |
| 31 | return false; |
| 32 | |
| 33 | return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); |
| 34 | } |
| 35 | |
| 36 | /* |
| 37 | * This function handles the case when an NMI is raised in the #VC |
| 38 | * exception handler entry code, before the #VC handler has switched off |
| 39 | * its IST stack. In this case, the IST entry for #VC must be adjusted, |
| 40 | * so that any nested #VC exception will not overwrite the stack |
| 41 | * contents of the interrupted #VC handler. |
| 42 | * |
| 43 | * The IST entry is adjusted unconditionally so that it can be also be |
| 44 | * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a |
| 45 | * nested sev_es_ist_exit() call may adjust back the IST entry too |
| 46 | * early. |
| 47 | * |
| 48 | * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run |
| 49 | * on the NMI IST stack, as they are only called from NMI handling code |
| 50 | * right now. |
| 51 | */ |
| 52 | void noinstr __sev_es_ist_enter(struct pt_regs *regs) |
| 53 | { |
| 54 | unsigned long old_ist, new_ist; |
| 55 | |
| 56 | /* Read old IST entry */ |
| 57 | new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); |
| 58 | |
| 59 | /* |
| 60 | * If NMI happened while on the #VC IST stack, set the new IST |
| 61 | * value below regs->sp, so that the interrupted stack frame is |
| 62 | * not overwritten by subsequent #VC exceptions. |
| 63 | */ |
| 64 | if (on_vc_stack(regs)) |
| 65 | new_ist = regs->sp; |
| 66 | |
| 67 | /* |
| 68 | * Reserve additional 8 bytes and store old IST value so this |
| 69 | * adjustment can be unrolled in __sev_es_ist_exit(). |
| 70 | */ |
| 71 | new_ist -= sizeof(old_ist); |
| 72 | *(unsigned long *)new_ist = old_ist; |
| 73 | |
| 74 | /* Set new IST entry */ |
| 75 | this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); |
| 76 | } |
| 77 | |
| 78 | void noinstr __sev_es_ist_exit(void) |
| 79 | { |
| 80 | unsigned long ist; |
| 81 | |
| 82 | /* Read IST entry */ |
| 83 | ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); |
| 84 | |
| 85 | if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) |
| 86 | return; |
| 87 | |
| 88 | /* Read back old IST entry and write it to the TSS */ |
| 89 | this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); |
| 90 | } |
| 91 | |
| 92 | void noinstr __sev_es_nmi_complete(void) |
| 93 | { |
| 94 | struct ghcb_state state; |
| 95 | struct ghcb *ghcb; |
| 96 | |
| 97 | ghcb = __sev_get_ghcb(state: &state); |
| 98 | |
| 99 | vc_ghcb_invalidate(ghcb); |
| 100 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); |
| 101 | ghcb_set_sw_exit_info_1(ghcb, value: 0); |
| 102 | ghcb_set_sw_exit_info_2(ghcb, value: 0); |
| 103 | |
| 104 | sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); |
| 105 | VMGEXIT(); |
| 106 | |
| 107 | __sev_put_ghcb(state: &state); |
| 108 | } |
| 109 | |
| 110 | /* |
| 111 | * Nothing shall interrupt this code path while holding the per-CPU |
| 112 | * GHCB. The backup GHCB is only for NMIs interrupting this path. |
| 113 | * |
| 114 | * Callers must disable local interrupts around it. |
| 115 | */ |
| 116 | noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) |
| 117 | { |
| 118 | struct sev_es_runtime_data *data; |
| 119 | struct ghcb *ghcb; |
| 120 | |
| 121 | WARN_ON(!irqs_disabled()); |
| 122 | |
| 123 | data = this_cpu_read(runtime_data); |
| 124 | ghcb = &data->ghcb_page; |
| 125 | |
| 126 | if (unlikely(data->ghcb_active)) { |
| 127 | /* GHCB is already in use - save its contents */ |
| 128 | |
| 129 | if (unlikely(data->backup_ghcb_active)) { |
| 130 | /* |
| 131 | * Backup-GHCB is also already in use. There is no way |
| 132 | * to continue here so just kill the machine. To make |
| 133 | * panic() work, mark GHCBs inactive so that messages |
| 134 | * can be printed out. |
| 135 | */ |
| 136 | data->ghcb_active = false; |
| 137 | data->backup_ghcb_active = false; |
| 138 | |
| 139 | instrumentation_begin(); |
| 140 | panic(fmt: "Unable to handle #VC exception! GHCB and Backup GHCB are already in use" ); |
| 141 | instrumentation_end(); |
| 142 | } |
| 143 | |
| 144 | /* Mark backup_ghcb active before writing to it */ |
| 145 | data->backup_ghcb_active = true; |
| 146 | |
| 147 | state->ghcb = &data->backup_ghcb; |
| 148 | |
| 149 | /* Backup GHCB content */ |
| 150 | *state->ghcb = *ghcb; |
| 151 | } else { |
| 152 | state->ghcb = NULL; |
| 153 | data->ghcb_active = true; |
| 154 | } |
| 155 | |
| 156 | return ghcb; |
| 157 | } |
| 158 | |
| 159 | noinstr void __sev_put_ghcb(struct ghcb_state *state) |
| 160 | { |
| 161 | struct sev_es_runtime_data *data; |
| 162 | struct ghcb *ghcb; |
| 163 | |
| 164 | WARN_ON(!irqs_disabled()); |
| 165 | |
| 166 | data = this_cpu_read(runtime_data); |
| 167 | ghcb = &data->ghcb_page; |
| 168 | |
| 169 | if (state->ghcb) { |
| 170 | /* Restore GHCB from Backup */ |
| 171 | *ghcb = *state->ghcb; |
| 172 | data->backup_ghcb_active = false; |
| 173 | state->ghcb = NULL; |
| 174 | } else { |
| 175 | /* |
| 176 | * Invalidate the GHCB so a VMGEXIT instruction issued |
| 177 | * from userspace won't appear to be valid. |
| 178 | */ |
| 179 | vc_ghcb_invalidate(ghcb); |
| 180 | data->ghcb_active = false; |
| 181 | } |
| 182 | } |
| 183 | |