1 | // SPDX-License-Identifier: GPL-2.0 |
---|---|
2 | /* |
3 | * AMD Encrypted Register State Support |
4 | * |
5 | * Author: Joerg Roedel <jroedel@suse.de> |
6 | * |
7 | * This file is not compiled stand-alone. It contains code shared |
8 | * between the pre-decompression boot code and the running Linux kernel |
9 | * and is included directly into both code-bases. |
10 | */ |
11 | |
12 | #include <asm/setup_data.h> |
13 | |
14 | #ifndef __BOOT_COMPRESSED |
15 | #define error(v) pr_err(v) |
16 | #define has_cpuflag(f) boot_cpu_has(f) |
17 | #else |
18 | #undef WARN |
19 | #define WARN(condition, format...) (!!(condition)) |
20 | #undef vc_forward_exception |
21 | #define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n") |
22 | #endif |
23 | |
24 | /* |
25 | * SVSM related information: |
26 | * During boot, the page tables are set up as identity mapped and later |
27 | * changed to use kernel virtual addresses. Maintain separate virtual and |
28 | * physical addresses for the CAA to allow SVSM functions to be used during |
29 | * early boot, both with identity mapped virtual addresses and proper kernel |
30 | * virtual addresses. |
31 | */ |
32 | struct svsm_ca *boot_svsm_caa __ro_after_init; |
33 | u64 boot_svsm_caa_pa __ro_after_init; |
34 | |
35 | /* |
36 | * Since feature negotiation related variables are set early in the boot |
37 | * process they must reside in the .data section so as not to be zeroed |
38 | * out when the .bss section is later cleared. |
39 | * |
40 | * GHCB protocol version negotiated with the hypervisor. |
41 | */ |
42 | static u16 ghcb_version __ro_after_init; |
43 | |
44 | /* Copy of the SNP firmware's CPUID page. */ |
45 | static struct snp_cpuid_table cpuid_table_copy __ro_after_init; |
46 | |
47 | /* |
48 | * These will be initialized based on CPUID table so that non-present |
49 | * all-zero leaves (for sparse tables) can be differentiated from |
50 | * invalid/out-of-range leaves. This is needed since all-zero leaves |
51 | * still need to be post-processed. |
52 | */ |
53 | static u32 cpuid_std_range_max __ro_after_init; |
54 | static u32 cpuid_hyp_range_max __ro_after_init; |
55 | static u32 cpuid_ext_range_max __ro_after_init; |
56 | |
57 | bool __init sev_es_check_cpu_features(void) |
58 | { |
59 | if (!has_cpuflag(X86_FEATURE_RDRAND)) { |
60 | error(m: "RDRAND instruction not supported - no trusted source of randomness available\n"); |
61 | return false; |
62 | } |
63 | |
64 | return true; |
65 | } |
66 | |
67 | void __head __noreturn |
68 | sev_es_terminate(unsigned int set, unsigned int reason) |
69 | { |
70 | u64 val = GHCB_MSR_TERM_REQ; |
71 | |
72 | /* Tell the hypervisor what went wrong. */ |
73 | val |= GHCB_SEV_TERM_REASON(set, reason); |
74 | |
75 | /* Request Guest Termination from Hypervisor */ |
76 | sev_es_wr_ghcb_msr(val); |
77 | VMGEXIT(); |
78 | |
79 | while (true) |
80 | asm volatile("hlt\n": : : "memory"); |
81 | } |
82 | |
83 | /* |
84 | * The hypervisor features are available from GHCB version 2 onward. |
85 | */ |
86 | u64 get_hv_features(void) |
87 | { |
88 | u64 val; |
89 | |
90 | if (ghcb_version < 2) |
91 | return 0; |
92 | |
93 | sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ); |
94 | VMGEXIT(); |
95 | |
96 | val = sev_es_rd_ghcb_msr(); |
97 | if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP) |
98 | return 0; |
99 | |
100 | return GHCB_MSR_HV_FT_RESP_VAL(val); |
101 | } |
102 | |
103 | void snp_register_ghcb_early(unsigned long paddr) |
104 | { |
105 | unsigned long pfn = paddr >> PAGE_SHIFT; |
106 | u64 val; |
107 | |
108 | sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn)); |
109 | VMGEXIT(); |
110 | |
111 | val = sev_es_rd_ghcb_msr(); |
112 | |
113 | /* If the response GPA is not ours then abort the guest */ |
114 | if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) || |
115 | (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn)) |
116 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER); |
117 | } |
118 | |
119 | bool sev_es_negotiate_protocol(void) |
120 | { |
121 | u64 val; |
122 | |
123 | /* Do the GHCB protocol version negotiation */ |
124 | sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); |
125 | VMGEXIT(); |
126 | val = sev_es_rd_ghcb_msr(); |
127 | |
128 | if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) |
129 | return false; |
130 | |
131 | if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN || |
132 | GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX) |
133 | return false; |
134 | |
135 | ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX); |
136 | |
137 | return true; |
138 | } |
139 | |
140 | static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt) |
141 | { |
142 | u32 ret; |
143 | |
144 | ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0); |
145 | if (!ret) |
146 | return ES_OK; |
147 | |
148 | if (ret == 1) { |
149 | u64 info = ghcb->save.sw_exit_info_2; |
150 | unsigned long v = info & SVM_EVTINJ_VEC_MASK; |
151 | |
152 | /* Check if exception information from hypervisor is sane. */ |
153 | if ((info & SVM_EVTINJ_VALID) && |
154 | ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) && |
155 | ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) { |
156 | ctxt->fi.vector = v; |
157 | |
158 | if (info & SVM_EVTINJ_VALID_ERR) |
159 | ctxt->fi.error_code = info >> 32; |
160 | |
161 | return ES_EXCEPTION; |
162 | } |
163 | } |
164 | |
165 | return ES_VMM_ERROR; |
166 | } |
167 | |
168 | static inline int svsm_process_result_codes(struct svsm_call *call) |
169 | { |
170 | switch (call->rax_out) { |
171 | case SVSM_SUCCESS: |
172 | return 0; |
173 | case SVSM_ERR_INCOMPLETE: |
174 | case SVSM_ERR_BUSY: |
175 | return -EAGAIN; |
176 | default: |
177 | return -EINVAL; |
178 | } |
179 | } |
180 | |
181 | /* |
182 | * Issue a VMGEXIT to call the SVSM: |
183 | * - Load the SVSM register state (RAX, RCX, RDX, R8 and R9) |
184 | * - Set the CA call pending field to 1 |
185 | * - Issue VMGEXIT |
186 | * - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9) |
187 | * - Perform atomic exchange of the CA call pending field |
188 | * |
189 | * - See the "Secure VM Service Module for SEV-SNP Guests" specification for |
190 | * details on the calling convention. |
191 | * - The calling convention loosely follows the Microsoft X64 calling |
192 | * convention by putting arguments in RCX, RDX, R8 and R9. |
193 | * - RAX specifies the SVSM protocol/callid as input and the return code |
194 | * as output. |
195 | */ |
196 | static __always_inline void svsm_issue_call(struct svsm_call *call, u8 *pending) |
197 | { |
198 | register unsigned long rax asm("rax") = call->rax; |
199 | register unsigned long rcx asm("rcx") = call->rcx; |
200 | register unsigned long rdx asm("rdx") = call->rdx; |
201 | register unsigned long r8 asm("r8") = call->r8; |
202 | register unsigned long r9 asm("r9") = call->r9; |
203 | |
204 | call->caa->call_pending = 1; |
205 | |
206 | asm volatile("rep; vmmcall\n\t" |
207 | : "+r"(rax), "+r"(rcx), "+r"(rdx), "+r"(r8), "+r"(r9) |
208 | : : "memory"); |
209 | |
210 | *pending = xchg(&call->caa->call_pending, *pending); |
211 | |
212 | call->rax_out = rax; |
213 | call->rcx_out = rcx; |
214 | call->rdx_out = rdx; |
215 | call->r8_out = r8; |
216 | call->r9_out = r9; |
217 | } |
218 | |
219 | static int svsm_perform_msr_protocol(struct svsm_call *call) |
220 | { |
221 | u8 pending = 0; |
222 | u64 val, resp; |
223 | |
224 | /* |
225 | * When using the MSR protocol, be sure to save and restore |
226 | * the current MSR value. |
227 | */ |
228 | val = sev_es_rd_ghcb_msr(); |
229 | |
230 | sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(0)); |
231 | |
232 | svsm_issue_call(call, pending: &pending); |
233 | |
234 | resp = sev_es_rd_ghcb_msr(); |
235 | |
236 | sev_es_wr_ghcb_msr(val); |
237 | |
238 | if (pending) |
239 | return -EINVAL; |
240 | |
241 | if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP) |
242 | return -EINVAL; |
243 | |
244 | if (GHCB_MSR_VMPL_RESP_VAL(resp)) |
245 | return -EINVAL; |
246 | |
247 | return svsm_process_result_codes(call); |
248 | } |
249 | |
250 | static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call) |
251 | { |
252 | struct es_em_ctxt ctxt; |
253 | u8 pending = 0; |
254 | |
255 | vc_ghcb_invalidate(ghcb); |
256 | |
257 | /* |
258 | * Fill in protocol and format specifiers. This can be called very early |
259 | * in the boot, so use rip-relative references as needed. |
260 | */ |
261 | ghcb->protocol_version = ghcb_version; |
262 | ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; |
263 | |
264 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL); |
265 | ghcb_set_sw_exit_info_1(ghcb, value: 0); |
266 | ghcb_set_sw_exit_info_2(ghcb, value: 0); |
267 | |
268 | sev_es_wr_ghcb_msr(__pa(ghcb)); |
269 | |
270 | svsm_issue_call(call, pending: &pending); |
271 | |
272 | if (pending) |
273 | return -EINVAL; |
274 | |
275 | switch (verify_exception_info(ghcb, ctxt: &ctxt)) { |
276 | case ES_OK: |
277 | break; |
278 | case ES_EXCEPTION: |
279 | vc_forward_exception(&ctxt); |
280 | fallthrough; |
281 | default: |
282 | return -EINVAL; |
283 | } |
284 | |
285 | return svsm_process_result_codes(call); |
286 | } |
287 | |
288 | enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, |
289 | struct es_em_ctxt *ctxt, |
290 | u64 exit_code, u64 exit_info_1, |
291 | u64 exit_info_2) |
292 | { |
293 | /* Fill in protocol and format specifiers */ |
294 | ghcb->protocol_version = ghcb_version; |
295 | ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; |
296 | |
297 | ghcb_set_sw_exit_code(ghcb, value: exit_code); |
298 | ghcb_set_sw_exit_info_1(ghcb, value: exit_info_1); |
299 | ghcb_set_sw_exit_info_2(ghcb, value: exit_info_2); |
300 | |
301 | sev_es_wr_ghcb_msr(__pa(ghcb)); |
302 | VMGEXIT(); |
303 | |
304 | return verify_exception_info(ghcb, ctxt); |
305 | } |
306 | |
307 | static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg) |
308 | { |
309 | u64 val; |
310 | |
311 | sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx)); |
312 | VMGEXIT(); |
313 | val = sev_es_rd_ghcb_msr(); |
314 | if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) |
315 | return -EIO; |
316 | |
317 | *reg = (val >> 32); |
318 | |
319 | return 0; |
320 | } |
321 | |
322 | static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf) |
323 | { |
324 | int ret; |
325 | |
326 | /* |
327 | * MSR protocol does not support fetching non-zero subfunctions, but is |
328 | * sufficient to handle current early-boot cases. Should that change, |
329 | * make sure to report an error rather than ignoring the index and |
330 | * grabbing random values. If this issue arises in the future, handling |
331 | * can be added here to use GHCB-page protocol for cases that occur late |
332 | * enough in boot that GHCB page is available. |
333 | */ |
334 | if (cpuid_function_is_indexed(function: leaf->fn) && leaf->subfn) |
335 | return -EINVAL; |
336 | |
337 | ret = __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_EAX, reg: &leaf->eax); |
338 | ret = ret ? : __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_EBX, reg: &leaf->ebx); |
339 | ret = ret ? : __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_ECX, reg: &leaf->ecx); |
340 | ret = ret ? : __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_EDX, reg: &leaf->edx); |
341 | |
342 | return ret; |
343 | } |
344 | |
345 | static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) |
346 | { |
347 | u32 cr4 = native_read_cr4(); |
348 | int ret; |
349 | |
350 | ghcb_set_rax(ghcb, value: leaf->fn); |
351 | ghcb_set_rcx(ghcb, value: leaf->subfn); |
352 | |
353 | if (cr4 & X86_CR4_OSXSAVE) |
354 | /* Safe to read xcr0 */ |
355 | ghcb_set_xcr0(ghcb, value: xgetbv(XCR_XFEATURE_ENABLED_MASK)); |
356 | else |
357 | /* xgetbv will cause #UD - use reset value for xcr0 */ |
358 | ghcb_set_xcr0(ghcb, value: 1); |
359 | |
360 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, exit_info_1: 0, exit_info_2: 0); |
361 | if (ret != ES_OK) |
362 | return ret; |
363 | |
364 | if (!(ghcb_rax_is_valid(ghcb) && |
365 | ghcb_rbx_is_valid(ghcb) && |
366 | ghcb_rcx_is_valid(ghcb) && |
367 | ghcb_rdx_is_valid(ghcb))) |
368 | return ES_VMM_ERROR; |
369 | |
370 | leaf->eax = ghcb->save.rax; |
371 | leaf->ebx = ghcb->save.rbx; |
372 | leaf->ecx = ghcb->save.rcx; |
373 | leaf->edx = ghcb->save.rdx; |
374 | |
375 | return ES_OK; |
376 | } |
377 | |
378 | static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) |
379 | { |
380 | return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf) |
381 | : __sev_cpuid_hv_msr(leaf); |
382 | } |
383 | |
384 | /* |
385 | * This may be called early while still running on the initial identity |
386 | * mapping. Use RIP-relative addressing to obtain the correct address |
387 | * while running with the initial identity mapping as well as the |
388 | * switch-over to kernel virtual addresses later. |
389 | */ |
390 | const struct snp_cpuid_table *snp_cpuid_get_table(void) |
391 | { |
392 | return rip_rel_ptr(p: &cpuid_table_copy); |
393 | } |
394 | |
395 | /* |
396 | * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of |
397 | * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0 |
398 | * and 1 based on the corresponding features enabled by a particular |
399 | * combination of XCR0 and XSS registers so that a guest can look up the |
400 | * version corresponding to the features currently enabled in its XCR0/XSS |
401 | * registers. The only values that differ between these versions/table |
402 | * entries is the enabled XSAVE area size advertised via EBX. |
403 | * |
404 | * While hypervisors may choose to make use of this support, it is more |
405 | * robust/secure for a guest to simply find the entry corresponding to the |
406 | * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the |
407 | * XSAVE area size using subfunctions 2 through 64, as documented in APM |
408 | * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here. |
409 | * |
410 | * Since base/legacy XSAVE area size is documented as 0x240, use that value |
411 | * directly rather than relying on the base size in the CPUID table. |
412 | * |
413 | * Return: XSAVE area size on success, 0 otherwise. |
414 | */ |
415 | static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted) |
416 | { |
417 | const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); |
418 | u64 xfeatures_found = 0; |
419 | u32 xsave_size = 0x240; |
420 | int i; |
421 | |
422 | for (i = 0; i < cpuid_table->count; i++) { |
423 | const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; |
424 | |
425 | if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64)) |
426 | continue; |
427 | if (!(xfeatures_en & (BIT_ULL(e->ecx_in)))) |
428 | continue; |
429 | if (xfeatures_found & (BIT_ULL(e->ecx_in))) |
430 | continue; |
431 | |
432 | xfeatures_found |= (BIT_ULL(e->ecx_in)); |
433 | |
434 | if (compacted) |
435 | xsave_size += e->eax; |
436 | else |
437 | xsave_size = max(xsave_size, e->eax + e->ebx); |
438 | } |
439 | |
440 | /* |
441 | * Either the guest set unsupported XCR0/XSS bits, or the corresponding |
442 | * entries in the CPUID table were not present. This is not a valid |
443 | * state to be in. |
444 | */ |
445 | if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2))) |
446 | return 0; |
447 | |
448 | return xsave_size; |
449 | } |
450 | |
451 | static bool __head |
452 | snp_cpuid_get_validated_func(struct cpuid_leaf *leaf) |
453 | { |
454 | const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); |
455 | int i; |
456 | |
457 | for (i = 0; i < cpuid_table->count; i++) { |
458 | const struct snp_cpuid_fn *e = &cpuid_table->fn[i]; |
459 | |
460 | if (e->eax_in != leaf->fn) |
461 | continue; |
462 | |
463 | if (cpuid_function_is_indexed(function: leaf->fn) && e->ecx_in != leaf->subfn) |
464 | continue; |
465 | |
466 | /* |
467 | * For 0xD subfunctions 0 and 1, only use the entry corresponding |
468 | * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0). |
469 | * See the comments above snp_cpuid_calc_xsave_size() for more |
470 | * details. |
471 | */ |
472 | if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1)) |
473 | if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in) |
474 | continue; |
475 | |
476 | leaf->eax = e->eax; |
477 | leaf->ebx = e->ebx; |
478 | leaf->ecx = e->ecx; |
479 | leaf->edx = e->edx; |
480 | |
481 | return true; |
482 | } |
483 | |
484 | return false; |
485 | } |
486 | |
487 | static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) |
488 | { |
489 | if (sev_cpuid_hv(ghcb, ctxt, leaf)) |
490 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV); |
491 | } |
492 | |
493 | static int __head |
494 | snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt, |
495 | struct cpuid_leaf *leaf) |
496 | { |
497 | struct cpuid_leaf leaf_hv = *leaf; |
498 | |
499 | switch (leaf->fn) { |
500 | case 0x1: |
501 | snp_cpuid_hv(ghcb, ctxt, leaf: &leaf_hv); |
502 | |
503 | /* initial APIC ID */ |
504 | leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0)); |
505 | /* APIC enabled bit */ |
506 | leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9)); |
507 | |
508 | /* OSXSAVE enabled bit */ |
509 | if (native_read_cr4() & X86_CR4_OSXSAVE) |
510 | leaf->ecx |= BIT(27); |
511 | break; |
512 | case 0x7: |
513 | /* OSPKE enabled bit */ |
514 | leaf->ecx &= ~BIT(4); |
515 | if (native_read_cr4() & X86_CR4_PKE) |
516 | leaf->ecx |= BIT(4); |
517 | break; |
518 | case 0xB: |
519 | leaf_hv.subfn = 0; |
520 | snp_cpuid_hv(ghcb, ctxt, leaf: &leaf_hv); |
521 | |
522 | /* extended APIC ID */ |
523 | leaf->edx = leaf_hv.edx; |
524 | break; |
525 | case 0xD: { |
526 | bool compacted = false; |
527 | u64 xcr0 = 1, xss = 0; |
528 | u32 xsave_size; |
529 | |
530 | if (leaf->subfn != 0 && leaf->subfn != 1) |
531 | return 0; |
532 | |
533 | if (native_read_cr4() & X86_CR4_OSXSAVE) |
534 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); |
535 | if (leaf->subfn == 1) { |
536 | /* Get XSS value if XSAVES is enabled. */ |
537 | if (leaf->eax & BIT(3)) { |
538 | unsigned long lo, hi; |
539 | |
540 | asm volatile("rdmsr": "=a"(lo), "=d"(hi) |
541 | : "c"(MSR_IA32_XSS)); |
542 | xss = (hi << 32) | lo; |
543 | } |
544 | |
545 | /* |
546 | * The PPR and APM aren't clear on what size should be |
547 | * encoded in 0xD:0x1:EBX when compaction is not enabled |
548 | * by either XSAVEC (feature bit 1) or XSAVES (feature |
549 | * bit 3) since SNP-capable hardware has these feature |
550 | * bits fixed as 1. KVM sets it to 0 in this case, but |
551 | * to avoid this becoming an issue it's safer to simply |
552 | * treat this as unsupported for SNP guests. |
553 | */ |
554 | if (!(leaf->eax & (BIT(1) | BIT(3)))) |
555 | return -EINVAL; |
556 | |
557 | compacted = true; |
558 | } |
559 | |
560 | xsave_size = snp_cpuid_calc_xsave_size(xfeatures_en: xcr0 | xss, compacted); |
561 | if (!xsave_size) |
562 | return -EINVAL; |
563 | |
564 | leaf->ebx = xsave_size; |
565 | } |
566 | break; |
567 | case 0x8000001E: |
568 | snp_cpuid_hv(ghcb, ctxt, leaf: &leaf_hv); |
569 | |
570 | /* extended APIC ID */ |
571 | leaf->eax = leaf_hv.eax; |
572 | /* compute ID */ |
573 | leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0)); |
574 | /* node ID */ |
575 | leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0)); |
576 | break; |
577 | default: |
578 | /* No fix-ups needed, use values as-is. */ |
579 | break; |
580 | } |
581 | |
582 | return 0; |
583 | } |
584 | |
585 | /* |
586 | * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value |
587 | * should be treated as fatal by caller. |
588 | */ |
589 | int __head |
590 | snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) |
591 | { |
592 | const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); |
593 | |
594 | if (!cpuid_table->count) |
595 | return -EOPNOTSUPP; |
596 | |
597 | if (!snp_cpuid_get_validated_func(leaf)) { |
598 | /* |
599 | * Some hypervisors will avoid keeping track of CPUID entries |
600 | * where all values are zero, since they can be handled the |
601 | * same as out-of-range values (all-zero). This is useful here |
602 | * as well as it allows virtually all guest configurations to |
603 | * work using a single SNP CPUID table. |
604 | * |
605 | * To allow for this, there is a need to distinguish between |
606 | * out-of-range entries and in-range zero entries, since the |
607 | * CPUID table entries are only a template that may need to be |
608 | * augmented with additional values for things like |
609 | * CPU-specific information during post-processing. So if it's |
610 | * not in the table, set the values to zero. Then, if they are |
611 | * within a valid CPUID range, proceed with post-processing |
612 | * using zeros as the initial values. Otherwise, skip |
613 | * post-processing and just return zeros immediately. |
614 | */ |
615 | leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; |
616 | |
617 | /* Skip post-processing for out-of-range zero leafs. */ |
618 | if (!(leaf->fn <= cpuid_std_range_max || |
619 | (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) || |
620 | (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max))) |
621 | return 0; |
622 | } |
623 | |
624 | return snp_cpuid_postprocess(ghcb, ctxt, leaf); |
625 | } |
626 | |
627 | /* |
628 | * Boot VC Handler - This is the first VC handler during boot, there is no GHCB |
629 | * page yet, so it only supports the MSR based communication with the |
630 | * hypervisor and only the CPUID exit-code. |
631 | */ |
632 | void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) |
633 | { |
634 | unsigned int subfn = lower_bits(val: regs->cx, bits: 32); |
635 | unsigned int fn = lower_bits(val: regs->ax, bits: 32); |
636 | u16 opcode = *(unsigned short *)regs->ip; |
637 | struct cpuid_leaf leaf; |
638 | int ret; |
639 | |
640 | /* Only CPUID is supported via MSR protocol */ |
641 | if (exit_code != SVM_EXIT_CPUID) |
642 | goto fail; |
643 | |
644 | /* Is it really a CPUID insn? */ |
645 | if (opcode != 0xa20f) |
646 | goto fail; |
647 | |
648 | leaf.fn = fn; |
649 | leaf.subfn = subfn; |
650 | |
651 | ret = snp_cpuid(NULL, NULL, leaf: &leaf); |
652 | if (!ret) |
653 | goto cpuid_done; |
654 | |
655 | if (ret != -EOPNOTSUPP) |
656 | goto fail; |
657 | |
658 | if (__sev_cpuid_hv_msr(leaf: &leaf)) |
659 | goto fail; |
660 | |
661 | cpuid_done: |
662 | regs->ax = leaf.eax; |
663 | regs->bx = leaf.ebx; |
664 | regs->cx = leaf.ecx; |
665 | regs->dx = leaf.edx; |
666 | |
667 | /* |
668 | * This is a VC handler and the #VC is only raised when SEV-ES is |
669 | * active, which means SEV must be active too. Do sanity checks on the |
670 | * CPUID results to make sure the hypervisor does not trick the kernel |
671 | * into the no-sev path. This could map sensitive data unencrypted and |
672 | * make it accessible to the hypervisor. |
673 | * |
674 | * In particular, check for: |
675 | * - Availability of CPUID leaf 0x8000001f |
676 | * - SEV CPUID bit. |
677 | * |
678 | * The hypervisor might still report the wrong C-bit position, but this |
679 | * can't be checked here. |
680 | */ |
681 | |
682 | if (fn == 0x80000000 && (regs->ax < 0x8000001f)) |
683 | /* SEV leaf check */ |
684 | goto fail; |
685 | else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) |
686 | /* SEV bit */ |
687 | goto fail; |
688 | |
689 | /* Skip over the CPUID two-byte opcode */ |
690 | regs->ip += 2; |
691 | |
692 | return; |
693 | |
694 | fail: |
695 | /* Terminate the guest */ |
696 | sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); |
697 | } |
698 | |
699 | struct cc_setup_data { |
700 | struct setup_data header; |
701 | u32 cc_blob_address; |
702 | }; |
703 | |
704 | /* |
705 | * Search for a Confidential Computing blob passed in as a setup_data entry |
706 | * via the Linux Boot Protocol. |
707 | */ |
708 | static __head |
709 | struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp) |
710 | { |
711 | struct cc_setup_data *sd = NULL; |
712 | struct setup_data *hdr; |
713 | |
714 | hdr = (struct setup_data *)bp->hdr.setup_data; |
715 | |
716 | while (hdr) { |
717 | if (hdr->type == SETUP_CC_BLOB) { |
718 | sd = (struct cc_setup_data *)hdr; |
719 | return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address; |
720 | } |
721 | hdr = (struct setup_data *)hdr->next; |
722 | } |
723 | |
724 | return NULL; |
725 | } |
726 | |
727 | /* |
728 | * Initialize the kernel's copy of the SNP CPUID table, and set up the |
729 | * pointer that will be used to access it. |
730 | * |
731 | * Maintaining a direct mapping of the SNP CPUID table used by firmware would |
732 | * be possible as an alternative, but the approach is brittle since the |
733 | * mapping needs to be updated in sync with all the changes to virtual memory |
734 | * layout and related mapping facilities throughout the boot process. |
735 | */ |
736 | static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) |
737 | { |
738 | const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table; |
739 | int i; |
740 | |
741 | if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE) |
742 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); |
743 | |
744 | cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys; |
745 | if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX) |
746 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID); |
747 | |
748 | cpuid_table = snp_cpuid_get_table(); |
749 | memcpy(to: (void *)cpuid_table, from: cpuid_table_fw, len: sizeof(*cpuid_table)); |
750 | |
751 | /* Initialize CPUID ranges for range-checking. */ |
752 | for (i = 0; i < cpuid_table->count; i++) { |
753 | const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; |
754 | |
755 | if (fn->eax_in == 0x0) |
756 | cpuid_std_range_max = fn->eax; |
757 | else if (fn->eax_in == 0x40000000) |
758 | cpuid_hyp_range_max = fn->eax; |
759 | else if (fn->eax_in == 0x80000000) |
760 | cpuid_ext_range_max = fn->eax; |
761 | } |
762 | } |
763 | |
764 | static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) |
765 | { |
766 | struct svsm_pvalidate_call *pc; |
767 | struct svsm_call call = {}; |
768 | unsigned long flags; |
769 | u64 pc_pa; |
770 | int ret; |
771 | |
772 | /* |
773 | * This can be called very early in the boot, use native functions in |
774 | * order to avoid paravirt issues. |
775 | */ |
776 | flags = native_local_irq_save(); |
777 | |
778 | call.caa = svsm_get_caa(); |
779 | |
780 | pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer; |
781 | pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); |
782 | |
783 | pc->num_entries = 1; |
784 | pc->cur_index = 0; |
785 | pc->entry[0].page_size = RMP_PG_SIZE_4K; |
786 | pc->entry[0].action = validate; |
787 | pc->entry[0].ignore_cf = 0; |
788 | pc->entry[0].pfn = paddr >> PAGE_SHIFT; |
789 | |
790 | /* Protocol 0, Call ID 1 */ |
791 | call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE); |
792 | call.rcx = pc_pa; |
793 | |
794 | ret = svsm_perform_call_protocol(call: &call); |
795 | if (ret) |
796 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); |
797 | |
798 | native_local_irq_restore(flags); |
799 | } |
800 | |
801 | static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, |
802 | bool validate) |
803 | { |
804 | int ret; |
805 | |
806 | if (snp_vmpl) { |
807 | svsm_pval_4k_page(paddr, validate); |
808 | } else { |
809 | ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); |
810 | if (ret) |
811 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); |
812 | } |
813 | } |
814 | |
815 | /* |
816 | * Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM |
817 | * services needed when not running in VMPL0. |
818 | */ |
819 | static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) |
820 | { |
821 | struct snp_secrets_page *secrets_page; |
822 | struct snp_cpuid_table *cpuid_table; |
823 | unsigned int i; |
824 | u64 caa; |
825 | |
826 | BUILD_BUG_ON(sizeof(*secrets_page) != PAGE_SIZE); |
827 | |
828 | /* |
829 | * Check if running at VMPL0. |
830 | * |
831 | * Use RMPADJUST (see the rmpadjust() function for a description of what |
832 | * the instruction does) to update the VMPL1 permissions of a page. If |
833 | * the guest is running at VMPL0, this will succeed and implies there is |
834 | * no SVSM. If the guest is running at any other VMPL, this will fail. |
835 | * Linux SNP guests only ever run at a single VMPL level so permission mask |
836 | * changes of a lesser-privileged VMPL are a don't-care. |
837 | * |
838 | * Use a rip-relative reference to obtain the proper address, since this |
839 | * routine is running identity mapped when called, both by the decompressor |
840 | * code and the early kernel code. |
841 | */ |
842 | if (!rmpadjust(vaddr: (unsigned long)rip_rel_ptr(p: &boot_ghcb_page), RMP_PG_SIZE_4K, attrs: 1)) |
843 | return false; |
844 | |
845 | /* |
846 | * Not running at VMPL0, ensure everything has been properly supplied |
847 | * for running under an SVSM. |
848 | */ |
849 | if (!cc_info || !cc_info->secrets_phys || cc_info->secrets_len != PAGE_SIZE) |
850 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECRETS_PAGE); |
851 | |
852 | secrets_page = (struct snp_secrets_page *)cc_info->secrets_phys; |
853 | if (!secrets_page->svsm_size) |
854 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NO_SVSM); |
855 | |
856 | if (!secrets_page->svsm_guest_vmpl) |
857 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0); |
858 | |
859 | snp_vmpl = secrets_page->svsm_guest_vmpl; |
860 | |
861 | caa = secrets_page->svsm_caa; |
862 | |
863 | /* |
864 | * An open-coded PAGE_ALIGNED() in order to avoid including |
865 | * kernel-proper headers into the decompressor. |
866 | */ |
867 | if (caa & (PAGE_SIZE - 1)) |
868 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA); |
869 | |
870 | /* |
871 | * The CA is identity mapped when this routine is called, both by the |
872 | * decompressor code and the early kernel code. |
873 | */ |
874 | boot_svsm_caa = (struct svsm_ca *)caa; |
875 | boot_svsm_caa_pa = caa; |
876 | |
877 | /* Advertise the SVSM presence via CPUID. */ |
878 | cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table(); |
879 | for (i = 0; i < cpuid_table->count; i++) { |
880 | struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; |
881 | |
882 | if (fn->eax_in == 0x8000001f) |
883 | fn->eax |= BIT(28); |
884 | } |
885 | |
886 | return true; |
887 | } |
888 |
Definitions
- boot_svsm_caa
- boot_svsm_caa_pa
- ghcb_version
- cpuid_table_copy
- cpuid_std_range_max
- cpuid_hyp_range_max
- cpuid_ext_range_max
- sev_es_check_cpu_features
- sev_es_terminate
- get_hv_features
- snp_register_ghcb_early
- sev_es_negotiate_protocol
- verify_exception_info
- svsm_process_result_codes
- svsm_issue_call
- svsm_perform_msr_protocol
- svsm_perform_ghcb_protocol
- sev_es_ghcb_hv_call
- __sev_cpuid_hv
- __sev_cpuid_hv_msr
- __sev_cpuid_hv_ghcb
- sev_cpuid_hv
- snp_cpuid_get_table
- snp_cpuid_calc_xsave_size
- snp_cpuid_get_validated_func
- snp_cpuid_hv
- snp_cpuid_postprocess
- snp_cpuid
- do_vc_no_ghcb
- cc_setup_data
- find_cc_blob_setup_data
- setup_cpuid_table
- svsm_pval_4k_page
- pvalidate_4k_page
Improve your Profiling and Debugging skills
Find out more