1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * AMD Memory Encryption Support
4 *
5 * Copyright (C) 2016 Advanced Micro Devices, Inc.
6 *
7 * Author: Tom Lendacky <thomas.lendacky@amd.com>
8 */
9
10#define DISABLE_BRANCH_PROFILING
11
12#include <linux/linkage.h>
13#include <linux/init.h>
14#include <linux/mm.h>
15#include <linux/dma-direct.h>
16#include <linux/swiotlb.h>
17#include <linux/mem_encrypt.h>
18#include <linux/device.h>
19#include <linux/kernel.h>
20#include <linux/bitops.h>
21#include <linux/dma-mapping.h>
22#include <linux/cc_platform.h>
23
24#include <asm/tlbflush.h>
25#include <asm/fixmap.h>
26#include <asm/setup.h>
27#include <asm/mem_encrypt.h>
28#include <asm/bootparam.h>
29#include <asm/set_memory.h>
30#include <asm/cacheflush.h>
31#include <asm/processor-flags.h>
32#include <asm/msr.h>
33#include <asm/cmdline.h>
34#include <asm/sev.h>
35
36#include "mm_internal.h"
37
38/*
39 * Since SME related variables are set early in the boot process they must
40 * reside in the .data section so as not to be zeroed out when the .bss
41 * section is later cleared.
42 */
43u64 sme_me_mask __section(".data") = 0;
44u64 sev_status __section(".data") = 0;
45u64 sev_check_data __section(".data") = 0;
46EXPORT_SYMBOL(sme_me_mask);
47
48/* Buffer used for early in-place encryption by BSP, no locking needed */
49static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
50
51/*
52 * SNP-specific routine which needs to additionally change the page state from
53 * private to shared before copying the data from the source to destination and
54 * restore after the copy.
55 */
56static inline void __init snp_memcpy(void *dst, void *src, size_t sz,
57 unsigned long paddr, bool decrypt)
58{
59 unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
60
61 if (decrypt) {
62 /*
63 * @paddr needs to be accessed decrypted, mark the page shared in
64 * the RMP table before copying it.
65 */
66 early_snp_set_memory_shared(vaddr: (unsigned long)__va(paddr), paddr, npages);
67
68 memcpy(dst, src, sz);
69
70 /* Restore the page state after the memcpy. */
71 early_snp_set_memory_private(vaddr: (unsigned long)__va(paddr), paddr, npages);
72 } else {
73 /*
74 * @paddr need to be accessed encrypted, no need for the page state
75 * change.
76 */
77 memcpy(dst, src, sz);
78 }
79}
80
81/*
82 * This routine does not change the underlying encryption setting of the
83 * page(s) that map this memory. It assumes that eventually the memory is
84 * meant to be accessed as either encrypted or decrypted but the contents
85 * are currently not in the desired state.
86 *
87 * This routine follows the steps outlined in the AMD64 Architecture
88 * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place.
89 */
90static void __init __sme_early_enc_dec(resource_size_t paddr,
91 unsigned long size, bool enc)
92{
93 void *src, *dst;
94 size_t len;
95
96 if (!sme_me_mask)
97 return;
98
99 wbinvd();
100
101 /*
102 * There are limited number of early mapping slots, so map (at most)
103 * one page at time.
104 */
105 while (size) {
106 len = min_t(size_t, sizeof(sme_early_buffer), size);
107
108 /*
109 * Create mappings for the current and desired format of
110 * the memory. Use a write-protected mapping for the source.
111 */
112 src = enc ? early_memremap_decrypted_wp(phys_addr: paddr, size: len) :
113 early_memremap_encrypted_wp(phys_addr: paddr, size: len);
114
115 dst = enc ? early_memremap_encrypted(phys_addr: paddr, size: len) :
116 early_memremap_decrypted(phys_addr: paddr, size: len);
117
118 /*
119 * If a mapping can't be obtained to perform the operation,
120 * then eventual access of that area in the desired mode
121 * will cause a crash.
122 */
123 BUG_ON(!src || !dst);
124
125 /*
126 * Use a temporary buffer, of cache-line multiple size, to
127 * avoid data corruption as documented in the APM.
128 */
129 if (cc_platform_has(attr: CC_ATTR_GUEST_SEV_SNP)) {
130 snp_memcpy(dst: sme_early_buffer, src, sz: len, paddr, decrypt: enc);
131 snp_memcpy(dst, src: sme_early_buffer, sz: len, paddr, decrypt: !enc);
132 } else {
133 memcpy(sme_early_buffer, src, len);
134 memcpy(dst, sme_early_buffer, len);
135 }
136
137 early_memunmap(addr: dst, size: len);
138 early_memunmap(addr: src, size: len);
139
140 paddr += len;
141 size -= len;
142 }
143}
144
145void __init sme_early_encrypt(resource_size_t paddr, unsigned long size)
146{
147 __sme_early_enc_dec(paddr, size, enc: true);
148}
149
150void __init sme_early_decrypt(resource_size_t paddr, unsigned long size)
151{
152 __sme_early_enc_dec(paddr, size, enc: false);
153}
154
155static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size,
156 bool map)
157{
158 unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET;
159 pmdval_t pmd_flags, pmd;
160
161 /* Use early_pmd_flags but remove the encryption mask */
162 pmd_flags = __sme_clr(early_pmd_flags);
163
164 do {
165 pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0;
166 __early_make_pgtable(address: (unsigned long)vaddr, pmd);
167
168 vaddr += PMD_SIZE;
169 paddr += PMD_SIZE;
170 size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE;
171 } while (size);
172
173 flush_tlb_local();
174}
175
176void __init sme_unmap_bootdata(char *real_mode_data)
177{
178 struct boot_params *boot_data;
179 unsigned long cmdline_paddr;
180
181 if (!cc_platform_has(attr: CC_ATTR_HOST_MEM_ENCRYPT))
182 return;
183
184 /* Get the command line address before unmapping the real_mode_data */
185 boot_data = (struct boot_params *)real_mode_data;
186 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
187
188 __sme_early_map_unmap_mem(vaddr: real_mode_data, size: sizeof(boot_params), map: false);
189
190 if (!cmdline_paddr)
191 return;
192
193 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, map: false);
194}
195
196void __init sme_map_bootdata(char *real_mode_data)
197{
198 struct boot_params *boot_data;
199 unsigned long cmdline_paddr;
200
201 if (!cc_platform_has(attr: CC_ATTR_HOST_MEM_ENCRYPT))
202 return;
203
204 __sme_early_map_unmap_mem(vaddr: real_mode_data, size: sizeof(boot_params), map: true);
205
206 /* Get the command line address after mapping the real_mode_data */
207 boot_data = (struct boot_params *)real_mode_data;
208 cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32);
209
210 if (!cmdline_paddr)
211 return;
212
213 __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, map: true);
214}
215
216static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
217{
218 unsigned long pfn = 0;
219 pgprot_t prot;
220
221 switch (level) {
222 case PG_LEVEL_4K:
223 pfn = pte_pfn(pte: *kpte);
224 prot = pte_pgprot(*kpte);
225 break;
226 case PG_LEVEL_2M:
227 pfn = pmd_pfn(pmd: *(pmd_t *)kpte);
228 prot = pmd_pgprot(*(pmd_t *)kpte);
229 break;
230 case PG_LEVEL_1G:
231 pfn = pud_pfn(pud: *(pud_t *)kpte);
232 prot = pud_pgprot(*(pud_t *)kpte);
233 break;
234 default:
235 WARN_ONCE(1, "Invalid level for kpte\n");
236 return 0;
237 }
238
239 if (ret_prot)
240 *ret_prot = prot;
241
242 return pfn;
243}
244
245static bool amd_enc_tlb_flush_required(bool enc)
246{
247 return true;
248}
249
250static bool amd_enc_cache_flush_required(void)
251{
252 return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT);
253}
254
255static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
256{
257#ifdef CONFIG_PARAVIRT
258 unsigned long vaddr_end = vaddr + size;
259
260 while (vaddr < vaddr_end) {
261 int psize, pmask, level;
262 unsigned long pfn;
263 pte_t *kpte;
264
265 kpte = lookup_address(address: vaddr, level: &level);
266 if (!kpte || pte_none(pte: *kpte)) {
267 WARN_ONCE(1, "kpte lookup for vaddr\n");
268 return;
269 }
270
271 pfn = pg_level_to_pfn(level, kpte, NULL);
272 if (!pfn)
273 continue;
274
275 psize = page_level_size(level);
276 pmask = page_level_mask(level);
277
278 notify_page_enc_status_changed(pfn, npages: psize >> PAGE_SHIFT, enc);
279
280 vaddr = (vaddr & pmask) + psize;
281 }
282#endif
283}
284
285static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
286{
287 /*
288 * To maintain the security guarantees of SEV-SNP guests, make sure
289 * to invalidate the memory before encryption attribute is cleared.
290 */
291 if (cc_platform_has(attr: CC_ATTR_GUEST_SEV_SNP) && !enc)
292 snp_set_memory_shared(vaddr, npages);
293
294 return true;
295}
296
297/* Return true unconditionally: return value doesn't matter for the SEV side */
298static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc)
299{
300 /*
301 * After memory is mapped encrypted in the page table, validate it
302 * so that it is consistent with the page table updates.
303 */
304 if (cc_platform_has(attr: CC_ATTR_GUEST_SEV_SNP) && enc)
305 snp_set_memory_private(vaddr, npages);
306
307 if (!cc_platform_has(attr: CC_ATTR_HOST_MEM_ENCRYPT))
308 enc_dec_hypercall(vaddr, size: npages << PAGE_SHIFT, enc);
309
310 return true;
311}
312
313static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
314{
315 pgprot_t old_prot, new_prot;
316 unsigned long pfn, pa, size;
317 pte_t new_pte;
318
319 pfn = pg_level_to_pfn(level, kpte, ret_prot: &old_prot);
320 if (!pfn)
321 return;
322
323 new_prot = old_prot;
324 if (enc)
325 pgprot_val(new_prot) |= _PAGE_ENC;
326 else
327 pgprot_val(new_prot) &= ~_PAGE_ENC;
328
329 /* If prot is same then do nothing. */
330 if (pgprot_val(old_prot) == pgprot_val(new_prot))
331 return;
332
333 pa = pfn << PAGE_SHIFT;
334 size = page_level_size(level);
335
336 /*
337 * We are going to perform in-place en-/decryption and change the
338 * physical page attribute from C=1 to C=0 or vice versa. Flush the
339 * caches to ensure that data gets accessed with the correct C-bit.
340 */
341 clflush_cache_range(__va(pa), size);
342
343 /* Encrypt/decrypt the contents in-place */
344 if (enc) {
345 sme_early_encrypt(paddr: pa, size);
346 } else {
347 sme_early_decrypt(paddr: pa, size);
348
349 /*
350 * ON SNP, the page state in the RMP table must happen
351 * before the page table updates.
352 */
353 early_snp_set_memory_shared(vaddr: (unsigned long)__va(pa), paddr: pa, npages: 1);
354 }
355
356 /* Change the page encryption mask. */
357 new_pte = pfn_pte(page_nr: pfn, pgprot: new_prot);
358 set_pte_atomic(ptep: kpte, pte: new_pte);
359
360 /*
361 * If page is set encrypted in the page table, then update the RMP table to
362 * add this page as private.
363 */
364 if (enc)
365 early_snp_set_memory_private(vaddr: (unsigned long)__va(pa), paddr: pa, npages: 1);
366}
367
368static int __init early_set_memory_enc_dec(unsigned long vaddr,
369 unsigned long size, bool enc)
370{
371 unsigned long vaddr_end, vaddr_next, start;
372 unsigned long psize, pmask;
373 int split_page_size_mask;
374 int level, ret;
375 pte_t *kpte;
376
377 start = vaddr;
378 vaddr_next = vaddr;
379 vaddr_end = vaddr + size;
380
381 for (; vaddr < vaddr_end; vaddr = vaddr_next) {
382 kpte = lookup_address(address: vaddr, level: &level);
383 if (!kpte || pte_none(pte: *kpte)) {
384 ret = 1;
385 goto out;
386 }
387
388 if (level == PG_LEVEL_4K) {
389 __set_clr_pte_enc(kpte, level, enc);
390 vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE;
391 continue;
392 }
393
394 psize = page_level_size(level);
395 pmask = page_level_mask(level);
396
397 /*
398 * Check whether we can change the large page in one go.
399 * We request a split when the address is not aligned and
400 * the number of pages to set/clear encryption bit is smaller
401 * than the number of pages in the large page.
402 */
403 if (vaddr == (vaddr & pmask) &&
404 ((vaddr_end - vaddr) >= psize)) {
405 __set_clr_pte_enc(kpte, level, enc);
406 vaddr_next = (vaddr & pmask) + psize;
407 continue;
408 }
409
410 /*
411 * The virtual address is part of a larger page, create the next
412 * level page table mapping (4K or 2M). If it is part of a 2M
413 * page then we request a split of the large page into 4K
414 * chunks. A 1GB large page is split into 2M pages, resp.
415 */
416 if (level == PG_LEVEL_2M)
417 split_page_size_mask = 0;
418 else
419 split_page_size_mask = 1 << PG_LEVEL_2M;
420
421 /*
422 * kernel_physical_mapping_change() does not flush the TLBs, so
423 * a TLB flush is required after we exit from the for loop.
424 */
425 kernel_physical_mapping_change(__pa(vaddr & pmask),
426 __pa((vaddr_end & pmask) + psize),
427 page_size_mask: split_page_size_mask);
428 }
429
430 ret = 0;
431
432 early_set_mem_enc_dec_hypercall(vaddr: start, size, enc);
433out:
434 __flush_tlb_all();
435 return ret;
436}
437
438int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size)
439{
440 return early_set_memory_enc_dec(vaddr, size, enc: false);
441}
442
443int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
444{
445 return early_set_memory_enc_dec(vaddr, size, enc: true);
446}
447
448void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
449{
450 enc_dec_hypercall(vaddr, size, enc);
451}
452
453void __init sme_early_init(void)
454{
455 if (!sme_me_mask)
456 return;
457
458 early_pmd_flags = __sme_set(early_pmd_flags);
459
460 __supported_pte_mask = __sme_set(__supported_pte_mask);
461
462 /* Update the protection map with memory encryption mask */
463 add_encrypt_protection_map();
464
465 x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare;
466 x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
467 x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
468 x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
469
470 /*
471 * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
472 * parallel bringup low level code. That raises #VC which cannot be
473 * handled there.
474 * It does not provide a RDMSR GHCB protocol so the early startup
475 * code cannot directly communicate with the secure firmware. The
476 * alternative solution to retrieve the APIC ID via CPUID(0xb),
477 * which is covered by the GHCB protocol, is not viable either
478 * because there is no enforcement of the CPUID(0xb) provided
479 * "initial" APIC ID to be the same as the real APIC ID.
480 * Disable parallel bootup.
481 */
482 if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
483 x86_cpuinit.parallel_bringup = false;
484}
485
486void __init mem_encrypt_free_decrypted_mem(void)
487{
488 unsigned long vaddr, vaddr_end, npages;
489 int r;
490
491 vaddr = (unsigned long)__start_bss_decrypted_unused;
492 vaddr_end = (unsigned long)__end_bss_decrypted;
493 npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
494
495 /*
496 * If the unused memory range was mapped decrypted, change the encryption
497 * attribute from decrypted to encrypted before freeing it. Base the
498 * re-encryption on the same condition used for the decryption in
499 * sme_postprocess_startup(). Higher level abstractions, such as
500 * CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM
501 * using vTOM, where sme_me_mask is always zero.
502 */
503 if (sme_me_mask) {
504 r = set_memory_encrypted(addr: vaddr, numpages: npages);
505 if (r) {
506 pr_warn("failed to free unused decrypted pages\n");
507 return;
508 }
509 }
510
511 free_init_pages(what: "unused decrypted", begin: vaddr, end: vaddr_end);
512}
513

source code of linux/arch/x86/mm/mem_encrypt_amd.c