sev-shared.c source code [linux/arch/x86/boot/startup/sev-shared.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* AMD Encrypted Register State Support
4	*
5	* Author: Joerg Roedel <jroedel@suse.de>
6	*
7	* This file is not compiled stand-alone. It contains code shared
8	* between the pre-decompression boot code and the running Linux kernel
9	* and is included directly into both code-bases.
10	*/
11
12	#include <asm/setup_data.h>
13
14	#ifndef __BOOT_COMPRESSED
15	#define error(v) pr_err(v)
16	#define has_cpuflag(f) boot_cpu_has(f)
17	#else
18	#undef WARN
19	#define WARN(condition, format...) (!!(condition))
20	#undef vc_forward_exception
21	#define vc_forward_exception(c) panic("SNP: Hypervisor requested exception\n")
22	#endif
23
24	/*
25	* SVSM related information:
26	* During boot, the page tables are set up as identity mapped and later
27	* changed to use kernel virtual addresses. Maintain separate virtual and
28	* physical addresses for the CAA to allow SVSM functions to be used during
29	* early boot, both with identity mapped virtual addresses and proper kernel
30	* virtual addresses.
31	*/
32	struct svsm_ca *boot_svsm_caa __ro_after_init;
33	u64 boot_svsm_caa_pa __ro_after_init;
34
35	/*
36	* Since feature negotiation related variables are set early in the boot
37	* process they must reside in the .data section so as not to be zeroed
38	* out when the .bss section is later cleared.
39	*
40	* GHCB protocol version negotiated with the hypervisor.
41	*/
42	static u16 ghcb_version __ro_after_init;
43
44	/ Copy of the SNP firmware's CPUID page. /
45	static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
46
47	/*
48	* These will be initialized based on CPUID table so that non-present
49	* all-zero leaves (for sparse tables) can be differentiated from
50	* invalid/out-of-range leaves. This is needed since all-zero leaves
51	* still need to be post-processed.
52	*/
53	static u32 cpuid_std_range_max __ro_after_init;
54	static u32 cpuid_hyp_range_max __ro_after_init;
55	static u32 cpuid_ext_range_max __ro_after_init;
56
57	bool __init sev_es_check_cpu_features(void)
58	{
59	if (!has_cpuflag(X86_FEATURE_RDRAND)) {
60	error(m: "RDRAND instruction not supported - no trusted source of randomness available\n");
61	return false;
62	}
63
64	return true;
65	}
66
67	void __head __noreturn
68	sev_es_terminate(unsigned int set, unsigned int reason)
69	{
70	u64 val = GHCB_MSR_TERM_REQ;
71
72	/ Tell the hypervisor what went wrong. /
73	val \|= GHCB_SEV_TERM_REASON(set, reason);
74
75	/ Request Guest Termination from Hypervisor /
76	sev_es_wr_ghcb_msr(val);
77	VMGEXIT();
78
79	while (true)
80	asm volatile("hlt\n" : : : "memory");
81	}
82
83	/*
84	* The hypervisor features are available from GHCB version 2 onward.
85	*/
86	u64 get_hv_features(void)
87	{
88	u64 val;
89
90	if (ghcb_version < `2`)
91	return `0`;
92
93	sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ);
94	VMGEXIT();
95
96	val = sev_es_rd_ghcb_msr();
97	if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP)
98	return `0`;
99
100	return GHCB_MSR_HV_FT_RESP_VAL(val);
101	}
102
103	void snp_register_ghcb_early(unsigned long paddr)
104	{
105	unsigned long pfn = paddr >> PAGE_SHIFT;
106	u64 val;
107
108	sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
109	VMGEXIT();
110
111	val = sev_es_rd_ghcb_msr();
112
113	/ If the response GPA is not ours then abort the guest /
114	if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) \|\|
115	(GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
116	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
117	}
118
119	bool sev_es_negotiate_protocol(void)
120	{
121	u64 val;
122
123	/ Do the GHCB protocol version negotiation /
124	sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
125	VMGEXIT();
126	val = sev_es_rd_ghcb_msr();
127
128	if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
129	return false;
130
131	if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN \|\|
132	GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
133	return false;
134
135	ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
136
137	return true;
138	}
139
140	static enum es_result verify_exception_info(struct ghcb ghcb, struct* es_em_ctxt *ctxt)
141	{
142	u32 ret;
143
144	ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(`31`, `0`);
145	if (!ret)
146	return ES_OK;
147
148	if (ret == `1`) {
149	u64 info = ghcb->save.sw_exit_info_2;
150	unsigned long v = info & SVM_EVTINJ_VEC_MASK;
151
152	/ Check if exception information from hypervisor is sane. /
153	if ((info & SVM_EVTINJ_VALID) &&
154	((v == X86_TRAP_GP) \|\| (v == X86_TRAP_UD)) &&
155	((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
156	ctxt->fi.vector = v;
157
158	if (info & SVM_EVTINJ_VALID_ERR)
159	ctxt->fi.error_code = info >> `32`;
160
161	return ES_EXCEPTION;
162	}
163	}
164
165	return ES_VMM_ERROR;
166	}
167
168	static inline int svsm_process_result_codes(struct svsm_call *call)
169	{
170	switch (call->rax_out) {
171	case SVSM_SUCCESS:
172	return `0`;
173	case SVSM_ERR_INCOMPLETE:
174	case SVSM_ERR_BUSY:
175	return -EAGAIN;
176	default:
177	return -EINVAL;
178	}
179	}
180
181	/*
182	* Issue a VMGEXIT to call the SVSM:
183	* - Load the SVSM register state (RAX, RCX, RDX, R8 and R9)
184	* - Set the CA call pending field to 1
185	* - Issue VMGEXIT
186	* - Save the SVSM return register state (RAX, RCX, RDX, R8 and R9)
187	* - Perform atomic exchange of the CA call pending field
188	*
189	* - See the "Secure VM Service Module for SEV-SNP Guests" specification for
190	* details on the calling convention.
191	* - The calling convention loosely follows the Microsoft X64 calling
192	* convention by putting arguments in RCX, RDX, R8 and R9.
193	* - RAX specifies the SVSM protocol/callid as input and the return code
194	* as output.
195	*/
196	static __always_inline void svsm_issue_call(struct svsm_call call, u8 pending)
197	{
198	register unsigned long rax asm("rax") = call->rax;
199	register unsigned long rcx asm("rcx") = call->rcx;
200	register unsigned long rdx asm("rdx") = call->rdx;
201	register unsigned long r8 asm("r8") = call->r8;
202	register unsigned long r9 asm("r9") = call->r9;
203
204	call->caa->call_pending = `1`;
205
206	asm volatile("rep; vmmcall\n\t"
207	: "+r" (rax), "+r" (rcx), "+r" (rdx), "+r" (r8), "+r" (r9)
208	: : "memory");
209
210	pending = xchg(&call->caa->call_pending, pending);
211
212	call->rax_out = rax;
213	call->rcx_out = rcx;
214	call->rdx_out = rdx;
215	call->r8_out = r8;
216	call->r9_out = r9;
217	}
218
219	static int svsm_perform_msr_protocol(struct svsm_call *call)
220	{
221	u8 pending = `0`;
222	u64 val, resp;
223
224	/*
225	* When using the MSR protocol, be sure to save and restore
226	* the current MSR value.
227	*/
228	val = sev_es_rd_ghcb_msr();
229
230	sev_es_wr_ghcb_msr(GHCB_MSR_VMPL_REQ_LEVEL(`0`));
231
232	svsm_issue_call(call, pending: &pending);
233
234	resp = sev_es_rd_ghcb_msr();
235
236	sev_es_wr_ghcb_msr(val);
237
238	if (pending)
239	return -EINVAL;
240
241	if (GHCB_RESP_CODE(resp) != GHCB_MSR_VMPL_RESP)
242	return -EINVAL;
243
244	if (GHCB_MSR_VMPL_RESP_VAL(resp))
245	return -EINVAL;
246
247	return svsm_process_result_codes(call);
248	}
249
250	static int svsm_perform_ghcb_protocol(struct ghcb ghcb, struct* svsm_call *call)
251	{
252	struct es_em_ctxt ctxt;
253	u8 pending = `0`;
254
255	vc_ghcb_invalidate(ghcb);
256
257	/*
258	* Fill in protocol and format specifiers. This can be called very early
259	* in the boot, so use rip-relative references as needed.
260	*/
261	ghcb->protocol_version = ghcb_version;
262	ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
263
264	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
265	ghcb_set_sw_exit_info_1(ghcb, value: `0`);
266	ghcb_set_sw_exit_info_2(ghcb, value: `0`);
267
268	sev_es_wr_ghcb_msr(__pa(ghcb));
269
270	svsm_issue_call(call, pending: &pending);
271
272	if (pending)
273	return -EINVAL;
274
275	switch (verify_exception_info(ghcb, ctxt: &ctxt)) {
276	case ES_OK:
277	break;
278	case ES_EXCEPTION:
279	vc_forward_exception(&ctxt);
280	fallthrough;
281	default:
282	return -EINVAL;
283	}
284
285	return svsm_process_result_codes(call);
286	}
287
288	enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
289	struct es_em_ctxt *ctxt,
290	u64 exit_code, u64 exit_info_1,
291	u64 exit_info_2)
292	{
293	/ Fill in protocol and format specifiers /
294	ghcb->protocol_version = ghcb_version;
295	ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
296
297	ghcb_set_sw_exit_code(ghcb, value: exit_code);
298	ghcb_set_sw_exit_info_1(ghcb, value: exit_info_1);
299	ghcb_set_sw_exit_info_2(ghcb, value: exit_info_2);
300
301	sev_es_wr_ghcb_msr(__pa(ghcb));
302	VMGEXIT();
303
304	return verify_exception_info(ghcb, ctxt);
305	}
306
307	static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
308	{
309	u64 val;
310
311	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx));
312	VMGEXIT();
313	val = sev_es_rd_ghcb_msr();
314	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
315	return -EIO;
316
317	*reg = (val >> `32`);
318
319	return `0`;
320	}
321
322	static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf)
323	{
324	int ret;
325
326	/*
327	* MSR protocol does not support fetching non-zero subfunctions, but is
328	* sufficient to handle current early-boot cases. Should that change,
329	* make sure to report an error rather than ignoring the index and
330	* grabbing random values. If this issue arises in the future, handling
331	* can be added here to use GHCB-page protocol for cases that occur late
332	* enough in boot that GHCB page is available.
333	*/
334	if (cpuid_function_is_indexed(function: leaf->fn) && leaf->subfn)
335	return -EINVAL;
336
337	ret = __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_EAX, reg: &leaf->eax);
338	ret = ret ? : __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_EBX, reg: &leaf->ebx);
339	ret = ret ? : __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_ECX, reg: &leaf->ecx);
340	ret = ret ? : __sev_cpuid_hv(fn: leaf->fn, GHCB_CPUID_REQ_EDX, reg: &leaf->edx);
341
342	return ret;
343	}
344
345	static int __sev_cpuid_hv_ghcb(struct ghcb ghcb, struct* es_em_ctxt ctxt, struct* cpuid_leaf *leaf)
346	{
347	u32 cr4 = native_read_cr4();
348	int ret;
349
350	ghcb_set_rax(ghcb, value: leaf->fn);
351	ghcb_set_rcx(ghcb, value: leaf->subfn);
352
353	if (cr4 & X86_CR4_OSXSAVE)
354	/ Safe to read xcr0 /
355	ghcb_set_xcr0(ghcb, value: xgetbv(XCR_XFEATURE_ENABLED_MASK));
356	else
357	/ xgetbv will cause #UD - use reset value for xcr0 /
358	ghcb_set_xcr0(ghcb, value: `1`);
359
360	ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, exit_info_1: `0`, exit_info_2: `0`);
361	if (ret != ES_OK)
362	return ret;
363
364	if (!(ghcb_rax_is_valid(ghcb) &&
365	ghcb_rbx_is_valid(ghcb) &&
366	ghcb_rcx_is_valid(ghcb) &&
367	ghcb_rdx_is_valid(ghcb)))
368	return ES_VMM_ERROR;
369
370	leaf->eax = ghcb->save.rax;
371	leaf->ebx = ghcb->save.rbx;
372	leaf->ecx = ghcb->save.rcx;
373	leaf->edx = ghcb->save.rdx;
374
375	return ES_OK;
376	}
377
378	static int sev_cpuid_hv(struct ghcb ghcb, struct* es_em_ctxt ctxt, struct* cpuid_leaf *leaf)
379	{
380	return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf)
381	: __sev_cpuid_hv_msr(leaf);
382	}
383
384	/*
385	* This may be called early while still running on the initial identity
386	* mapping. Use RIP-relative addressing to obtain the correct address
387	* while running with the initial identity mapping as well as the
388	* switch-over to kernel virtual addresses later.
389	*/
390	const struct snp_cpuid_table snp_cpuid_get_table(void*)
391	{
392	return rip_rel_ptr(p: &cpuid_table_copy);
393	}
394
395	/*
396	* The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
397	* XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
398	* and 1 based on the corresponding features enabled by a particular
399	* combination of XCR0 and XSS registers so that a guest can look up the
400	* version corresponding to the features currently enabled in its XCR0/XSS
401	* registers. The only values that differ between these versions/table
402	* entries is the enabled XSAVE area size advertised via EBX.
403	*
404	* While hypervisors may choose to make use of this support, it is more
405	* robust/secure for a guest to simply find the entry corresponding to the
406	* base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
407	* XSAVE area size using subfunctions 2 through 64, as documented in APM
408	* Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
409	*
410	* Since base/legacy XSAVE area size is documented as 0x240, use that value
411	* directly rather than relying on the base size in the CPUID table.
412	*
413	* Return: XSAVE area size on success, 0 otherwise.
414	*/
415	static u32 __head snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
416	{
417	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
418	u64 xfeatures_found = `0`;
419	u32 xsave_size = `0x240`;
420	int i;
421
422	for (i = `0`; i < cpuid_table->count; i++) {
423	const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
424
425	if (!(e->eax_in == `0xD` && e->ecx_in > `1` && e->ecx_in < `64`))
426	continue;
427	if (!(xfeatures_en & (BIT_ULL(e->ecx_in))))
428	continue;
429	if (xfeatures_found & (BIT_ULL(e->ecx_in)))
430	continue;
431
432	xfeatures_found \|= (BIT_ULL(e->ecx_in));
433
434	if (compacted)
435	xsave_size += e->eax;
436	else
437	xsave_size = max(xsave_size, e->eax + e->ebx);
438	}
439
440	/*
441	* Either the guest set unsupported XCR0/XSS bits, or the corresponding
442	* entries in the CPUID table were not present. This is not a valid
443	* state to be in.
444	*/
445	if (xfeatures_found != (xfeatures_en & GENMASK_ULL(`63`, `2`)))
446	return `0`;
447
448	return xsave_size;
449	}
450
451	static bool __head
452	snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
453	{
454	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
455	int i;
456
457	for (i = `0`; i < cpuid_table->count; i++) {
458	const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
459
460	if (e->eax_in != leaf->fn)
461	continue;
462
463	if (cpuid_function_is_indexed(function: leaf->fn) && e->ecx_in != leaf->subfn)
464	continue;
465
466	/*
467	* For 0xD subfunctions 0 and 1, only use the entry corresponding
468	* to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
469	* See the comments above snp_cpuid_calc_xsave_size() for more
470	* details.
471	*/
472	if (e->eax_in == `0xD` && (e->ecx_in == `0` \|\| e->ecx_in == `1`))
473	if (!(e->xcr0_in == `1` \|\| e->xcr0_in == `3`) \|\| e->xss_in)
474	continue;
475
476	leaf->eax = e->eax;
477	leaf->ebx = e->ebx;
478	leaf->ecx = e->ecx;
479	leaf->edx = e->edx;
480
481	return true;
482	}
483
484	return false;
485	}
486
487	static void snp_cpuid_hv(struct ghcb ghcb, struct* es_em_ctxt ctxt, struct* cpuid_leaf *leaf)
488	{
489	if (sev_cpuid_hv(ghcb, ctxt, leaf))
490	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
491	}
492
493	static int __head
494	snp_cpuid_postprocess(struct ghcb ghcb, struct* es_em_ctxt *ctxt,
495	struct cpuid_leaf *leaf)
496	{
497	struct cpuid_leaf leaf_hv = *leaf;
498
499	switch (leaf->fn) {
500	case `0x1`:
501	snp_cpuid_hv(ghcb, ctxt, leaf: &leaf_hv);
502
503	/ initial APIC ID /
504	leaf->ebx = (leaf_hv.ebx & GENMASK(`31`, `24`)) \| (leaf->ebx & GENMASK(`23`, `0`));
505	/ APIC enabled bit /
506	leaf->edx = (leaf_hv.edx & BIT(`9`)) \| (leaf->edx & ~BIT(`9`));
507
508	/ OSXSAVE enabled bit /
509	if (native_read_cr4() & X86_CR4_OSXSAVE)
510	leaf->ecx \|= BIT(`27`);
511	break;
512	case `0x7`:
513	/ OSPKE enabled bit /
514	leaf->ecx &= ~BIT(`4`);
515	if (native_read_cr4() & X86_CR4_PKE)
516	leaf->ecx \|= BIT(`4`);
517	break;
518	case `0xB`:
519	leaf_hv.subfn = `0`;
520	snp_cpuid_hv(ghcb, ctxt, leaf: &leaf_hv);
521
522	/ extended APIC ID /
523	leaf->edx = leaf_hv.edx;
524	break;
525	case `0xD`: {
526	bool compacted = false;
527	u64 xcr0 = `1`, xss = `0`;
528	u32 xsave_size;
529
530	if (leaf->subfn != `0` && leaf->subfn != `1`)
531	return `0`;
532
533	if (native_read_cr4() & X86_CR4_OSXSAVE)
534	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
535	if (leaf->subfn == `1`) {
536	/ Get XSS value if XSAVES is enabled. /
537	if (leaf->eax & BIT(`3`)) {
538	unsigned long lo, hi;
539
540	asm volatile("rdmsr" : "=a" (lo), "=d" (hi)
541	: "c" (MSR_IA32_XSS));
542	xss = (hi << `32`) \| lo;
543	}
544
545	/*
546	* The PPR and APM aren't clear on what size should be
547	* encoded in 0xD:0x1:EBX when compaction is not enabled
548	* by either XSAVEC (feature bit 1) or XSAVES (feature
549	* bit 3) since SNP-capable hardware has these feature
550	* bits fixed as 1. KVM sets it to 0 in this case, but
551	* to avoid this becoming an issue it's safer to simply
552	* treat this as unsupported for SNP guests.
553	*/
554	if (!(leaf->eax & (BIT(`1`) \| BIT(`3`))))
555	return -EINVAL;
556
557	compacted = true;
558	}
559
560	xsave_size = snp_cpuid_calc_xsave_size(xfeatures_en: xcr0 \| xss, compacted);
561	if (!xsave_size)
562	return -EINVAL;
563
564	leaf->ebx = xsave_size;
565	}
566	break;
567	case `0x8000001E`:
568	snp_cpuid_hv(ghcb, ctxt, leaf: &leaf_hv);
569
570	/ extended APIC ID /
571	leaf->eax = leaf_hv.eax;
572	/ compute ID /
573	leaf->ebx = (leaf->ebx & GENMASK(`31`, `8`)) \| (leaf_hv.ebx & GENMASK(`7`, `0`));
574	/ node ID /
575	leaf->ecx = (leaf->ecx & GENMASK(`31`, `8`)) \| (leaf_hv.ecx & GENMASK(`7`, `0`));
576	break;
577	default:
578	/ No fix-ups needed, use values as-is. /
579	break;
580	}
581
582	return `0`;
583	}
584
585	/*
586	* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
587	* should be treated as fatal by caller.
588	*/
589	int __head
590	snp_cpuid(struct ghcb ghcb, struct* es_em_ctxt ctxt, struct* cpuid_leaf *leaf)
591	{
592	const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
593
594	if (!cpuid_table->count)
595	return -EOPNOTSUPP;
596
597	if (!snp_cpuid_get_validated_func(leaf)) {
598	/*
599	* Some hypervisors will avoid keeping track of CPUID entries
600	* where all values are zero, since they can be handled the
601	* same as out-of-range values (all-zero). This is useful here
602	* as well as it allows virtually all guest configurations to
603	* work using a single SNP CPUID table.
604	*
605	* To allow for this, there is a need to distinguish between
606	* out-of-range entries and in-range zero entries, since the
607	* CPUID table entries are only a template that may need to be
608	* augmented with additional values for things like
609	* CPU-specific information during post-processing. So if it's
610	* not in the table, set the values to zero. Then, if they are
611	* within a valid CPUID range, proceed with post-processing
612	* using zeros as the initial values. Otherwise, skip
613	* post-processing and just return zeros immediately.
614	*/
615	leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = `0`;
616
617	/ Skip post-processing for out-of-range zero leafs. /
618	if (!(leaf->fn <= cpuid_std_range_max \|\|
619	(leaf->fn >= `0x40000000` && leaf->fn <= cpuid_hyp_range_max) \|\|
620	(leaf->fn >= `0x80000000` && leaf->fn <= cpuid_ext_range_max)))
621	return `0`;
622	}
623
624	return snp_cpuid_postprocess(ghcb, ctxt, leaf);
625	}
626
627	/*
628	* Boot VC Handler - This is the first VC handler during boot, there is no GHCB
629	* page yet, so it only supports the MSR based communication with the
630	* hypervisor and only the CPUID exit-code.
631	*/
632	void __head do_vc_no_ghcb(struct pt_regs regs, unsigned* long exit_code)
633	{
634	unsigned int subfn = lower_bits(val: regs->cx, bits: `32`);
635	unsigned int fn = lower_bits(val: regs->ax, bits: `32`);
636	u16 opcode = (unsigned* short *)regs->ip;
637	struct cpuid_leaf leaf;
638	int ret;
639
640	/ Only CPUID is supported via MSR protocol /
641	if (exit_code != SVM_EXIT_CPUID)
642	goto fail;
643
644	/ Is it really a CPUID insn? /
645	if (opcode != `0xa20f`)
646	goto fail;
647
648	leaf.fn = fn;
649	leaf.subfn = subfn;
650
651	ret = snp_cpuid(NULL, NULL, leaf: &leaf);
652	if (!ret)
653	goto cpuid_done;
654
655	if (ret != -EOPNOTSUPP)
656	goto fail;
657
658	if (__sev_cpuid_hv_msr(leaf: &leaf))
659	goto fail;
660
661	cpuid_done:
662	regs->ax = leaf.eax;
663	regs->bx = leaf.ebx;
664	regs->cx = leaf.ecx;
665	regs->dx = leaf.edx;
666
667	/*
668	* This is a VC handler and the #VC is only raised when SEV-ES is
669	* active, which means SEV must be active too. Do sanity checks on the
670	* CPUID results to make sure the hypervisor does not trick the kernel
671	* into the no-sev path. This could map sensitive data unencrypted and
672	* make it accessible to the hypervisor.
673	*
674	* In particular, check for:
675	* - Availability of CPUID leaf 0x8000001f
676	* - SEV CPUID bit.
677	*
678	* The hypervisor might still report the wrong C-bit position, but this
679	* can't be checked here.
680	*/
681
682	if (fn == `0x80000000` && (regs->ax < `0x8000001f`))
683	/ SEV leaf check /
684	goto fail;
685	else if ((fn == `0x8000001f` && !(regs->ax & BIT(`1`))))
686	/ SEV bit /
687	goto fail;
688
689	/ Skip over the CPUID two-byte opcode /
690	regs->ip += `2`;
691
692	return;
693
694	fail:
695	/ Terminate the guest /
696	sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
697	}
698
699	struct cc_setup_data {
700	struct setup_data header;
701	u32 cc_blob_address;
702	};
703
704	/*
705	* Search for a Confidential Computing blob passed in as a setup_data entry
706	* via the Linux Boot Protocol.
707	*/
708	static __head
709	struct cc_blob_sev_info find_cc_blob_setup_data(struct* boot_params *bp)
710	{
711	struct cc_setup_data *sd = NULL;
712	struct setup_data *hdr;
713
714	hdr = (struct setup_data *)bp->hdr.setup_data;
715
716	while (hdr) {
717	if (hdr->type == SETUP_CC_BLOB) {
718	sd = (struct cc_setup_data *)hdr;
719	return (struct cc_blob_sev_info )(unsigned* long)sd->cc_blob_address;
720	}
721	hdr = (struct setup_data *)hdr->next;
722	}
723
724	return NULL;
725	}
726
727	/*
728	* Initialize the kernel's copy of the SNP CPUID table, and set up the
729	* pointer that will be used to access it.
730	*
731	* Maintaining a direct mapping of the SNP CPUID table used by firmware would
732	* be possible as an alternative, but the approach is brittle since the
733	* mapping needs to be updated in sync with all the changes to virtual memory
734	* layout and related mapping facilities throughout the boot process.
735	*/
736	static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
737	{
738	const struct snp_cpuid_table cpuid_table_fw, cpuid_table;
739	int i;
740
741	if (!cc_info \|\| !cc_info->cpuid_phys \|\| cc_info->cpuid_len < PAGE_SIZE)
742	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
743
744	cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys;
745	if (!cpuid_table_fw->count \|\| cpuid_table_fw->count > SNP_CPUID_COUNT_MAX)
746	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
747
748	cpuid_table = snp_cpuid_get_table();
749	memcpy(to: (void )cpuid_table, from: cpuid_table_fw, len: sizeof(cpuid_table));
750
751	/ Initialize CPUID ranges for range-checking. /
752	for (i = `0`; i < cpuid_table->count; i++) {
753	const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
754
755	if (fn->eax_in == `0x0`)
756	cpuid_std_range_max = fn->eax;
757	else if (fn->eax_in == `0x40000000`)
758	cpuid_hyp_range_max = fn->eax;
759	else if (fn->eax_in == `0x80000000`)
760	cpuid_ext_range_max = fn->eax;
761	}
762	}
763
764	static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
765	{
766	struct svsm_pvalidate_call *pc;
767	struct svsm_call call = {};
768	unsigned long flags;
769	u64 pc_pa;
770	int ret;
771
772	/*
773	* This can be called very early in the boot, use native functions in
774	* order to avoid paravirt issues.
775	*/
776	flags = native_local_irq_save();
777
778	call.caa = svsm_get_caa();
779
780	pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
781	pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
782
783	pc->num_entries = `1`;
784	pc->cur_index = `0`;
785	pc->entry[`0`].page_size = RMP_PG_SIZE_4K;
786	pc->entry[`0`].action = validate;
787	pc->entry[`0`].ignore_cf = `0`;
788	pc->entry[`0`].pfn = paddr >> PAGE_SHIFT;
789
790	/ Protocol 0, Call ID 1 /
791	call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
792	call.rcx = pc_pa;
793
794	ret = svsm_perform_call_protocol(call: &call);
795	if (ret)
796	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
797
798	native_local_irq_restore(flags);
799	}
800
801	static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
802	bool validate)
803	{
804	int ret;
805
806	if (snp_vmpl) {
807	svsm_pval_4k_page(paddr, validate);
808	} else {
809	ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
810	if (ret)
811	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
812	}
813	}
814
815	/*
816	* Maintain the GPA of the SVSM Calling Area (CA) in order to utilize the SVSM
817	* services needed when not running in VMPL0.
818	*/
819	static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
820	{
821	struct snp_secrets_page *secrets_page;
822	struct snp_cpuid_table *cpuid_table;
823	unsigned int i;
824	u64 caa;
825
826	BUILD_BUG_ON(sizeof(*secrets_page) != PAGE_SIZE);
827
828	/*
829	* Check if running at VMPL0.
830	*
831	* Use RMPADJUST (see the rmpadjust() function for a description of what
832	* the instruction does) to update the VMPL1 permissions of a page. If
833	* the guest is running at VMPL0, this will succeed and implies there is
834	* no SVSM. If the guest is running at any other VMPL, this will fail.
835	* Linux SNP guests only ever run at a single VMPL level so permission mask
836	* changes of a lesser-privileged VMPL are a don't-care.
837	*
838	* Use a rip-relative reference to obtain the proper address, since this
839	* routine is running identity mapped when called, both by the decompressor
840	* code and the early kernel code.
841	*/
842	if (!rmpadjust(vaddr: (unsigned long)rip_rel_ptr(p: &boot_ghcb_page), RMP_PG_SIZE_4K, attrs: `1`))
843	return false;
844
845	/*
846	* Not running at VMPL0, ensure everything has been properly supplied
847	* for running under an SVSM.
848	*/
849	if (!cc_info \|\| !cc_info->secrets_phys \|\| cc_info->secrets_len != PAGE_SIZE)
850	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECRETS_PAGE);
851
852	secrets_page = (struct snp_secrets_page *)cc_info->secrets_phys;
853	if (!secrets_page->svsm_size)
854	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_NO_SVSM);
855
856	if (!secrets_page->svsm_guest_vmpl)
857	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0);
858
859	snp_vmpl = secrets_page->svsm_guest_vmpl;
860
861	caa = secrets_page->svsm_caa;
862
863	/*
864	* An open-coded PAGE_ALIGNED() in order to avoid including
865	* kernel-proper headers into the decompressor.
866	*/
867	if (caa & (PAGE_SIZE - `1`))
868	sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CAA);
869
870	/*
871	* The CA is identity mapped when this routine is called, both by the
872	* decompressor code and the early kernel code.
873	*/
874	boot_svsm_caa = (struct svsm_ca *)caa;
875	boot_svsm_caa_pa = caa;
876
877	/ Advertise the SVSM presence via CPUID. /
878	cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table();
879	for (i = `0`; i < cpuid_table->count; i++) {
880	struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
881
882	if (fn->eax_in == `0x8000001f`)
883	fn->eax \|= BIT(`28`);
884	}
885
886	return true;
887	}
888

Provided by KDAB

Definitions

source code of linux/arch/x86/boot/startup/sev-shared.c