1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * AMD Memory Encryption Support |
4 | * |
5 | * Copyright (C) 2016 Advanced Micro Devices, Inc. |
6 | * |
7 | * Author: Tom Lendacky <thomas.lendacky@amd.com> |
8 | */ |
9 | |
10 | #define DISABLE_BRANCH_PROFILING |
11 | |
12 | #include <linux/linkage.h> |
13 | #include <linux/init.h> |
14 | #include <linux/mm.h> |
15 | #include <linux/dma-direct.h> |
16 | #include <linux/swiotlb.h> |
17 | #include <linux/mem_encrypt.h> |
18 | #include <linux/device.h> |
19 | #include <linux/kernel.h> |
20 | #include <linux/bitops.h> |
21 | #include <linux/dma-mapping.h> |
22 | #include <linux/cc_platform.h> |
23 | |
24 | #include <asm/tlbflush.h> |
25 | #include <asm/fixmap.h> |
26 | #include <asm/setup.h> |
27 | #include <asm/mem_encrypt.h> |
28 | #include <asm/bootparam.h> |
29 | #include <asm/set_memory.h> |
30 | #include <asm/cacheflush.h> |
31 | #include <asm/processor-flags.h> |
32 | #include <asm/msr.h> |
33 | #include <asm/cmdline.h> |
34 | #include <asm/sev.h> |
35 | |
36 | #include "mm_internal.h" |
37 | |
38 | /* |
39 | * Since SME related variables are set early in the boot process they must |
40 | * reside in the .data section so as not to be zeroed out when the .bss |
41 | * section is later cleared. |
42 | */ |
43 | u64 sme_me_mask __section(".data" ) = 0; |
44 | u64 sev_status __section(".data" ) = 0; |
45 | u64 sev_check_data __section(".data" ) = 0; |
46 | EXPORT_SYMBOL(sme_me_mask); |
47 | |
48 | /* Buffer used for early in-place encryption by BSP, no locking needed */ |
49 | static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); |
50 | |
51 | /* |
52 | * SNP-specific routine which needs to additionally change the page state from |
53 | * private to shared before copying the data from the source to destination and |
54 | * restore after the copy. |
55 | */ |
56 | static inline void __init snp_memcpy(void *dst, void *src, size_t sz, |
57 | unsigned long paddr, bool decrypt) |
58 | { |
59 | unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; |
60 | |
61 | if (decrypt) { |
62 | /* |
63 | * @paddr needs to be accessed decrypted, mark the page shared in |
64 | * the RMP table before copying it. |
65 | */ |
66 | early_snp_set_memory_shared(vaddr: (unsigned long)__va(paddr), paddr, npages); |
67 | |
68 | memcpy(dst, src, sz); |
69 | |
70 | /* Restore the page state after the memcpy. */ |
71 | early_snp_set_memory_private(vaddr: (unsigned long)__va(paddr), paddr, npages); |
72 | } else { |
73 | /* |
74 | * @paddr need to be accessed encrypted, no need for the page state |
75 | * change. |
76 | */ |
77 | memcpy(dst, src, sz); |
78 | } |
79 | } |
80 | |
81 | /* |
82 | * This routine does not change the underlying encryption setting of the |
83 | * page(s) that map this memory. It assumes that eventually the memory is |
84 | * meant to be accessed as either encrypted or decrypted but the contents |
85 | * are currently not in the desired state. |
86 | * |
87 | * This routine follows the steps outlined in the AMD64 Architecture |
88 | * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. |
89 | */ |
90 | static void __init __sme_early_enc_dec(resource_size_t paddr, |
91 | unsigned long size, bool enc) |
92 | { |
93 | void *src, *dst; |
94 | size_t len; |
95 | |
96 | if (!sme_me_mask) |
97 | return; |
98 | |
99 | wbinvd(); |
100 | |
101 | /* |
102 | * There are limited number of early mapping slots, so map (at most) |
103 | * one page at time. |
104 | */ |
105 | while (size) { |
106 | len = min_t(size_t, sizeof(sme_early_buffer), size); |
107 | |
108 | /* |
109 | * Create mappings for the current and desired format of |
110 | * the memory. Use a write-protected mapping for the source. |
111 | */ |
112 | src = enc ? early_memremap_decrypted_wp(phys_addr: paddr, size: len) : |
113 | early_memremap_encrypted_wp(phys_addr: paddr, size: len); |
114 | |
115 | dst = enc ? early_memremap_encrypted(phys_addr: paddr, size: len) : |
116 | early_memremap_decrypted(phys_addr: paddr, size: len); |
117 | |
118 | /* |
119 | * If a mapping can't be obtained to perform the operation, |
120 | * then eventual access of that area in the desired mode |
121 | * will cause a crash. |
122 | */ |
123 | BUG_ON(!src || !dst); |
124 | |
125 | /* |
126 | * Use a temporary buffer, of cache-line multiple size, to |
127 | * avoid data corruption as documented in the APM. |
128 | */ |
129 | if (cc_platform_has(attr: CC_ATTR_GUEST_SEV_SNP)) { |
130 | snp_memcpy(dst: sme_early_buffer, src, sz: len, paddr, decrypt: enc); |
131 | snp_memcpy(dst, src: sme_early_buffer, sz: len, paddr, decrypt: !enc); |
132 | } else { |
133 | memcpy(sme_early_buffer, src, len); |
134 | memcpy(dst, sme_early_buffer, len); |
135 | } |
136 | |
137 | early_memunmap(addr: dst, size: len); |
138 | early_memunmap(addr: src, size: len); |
139 | |
140 | paddr += len; |
141 | size -= len; |
142 | } |
143 | } |
144 | |
145 | void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) |
146 | { |
147 | __sme_early_enc_dec(paddr, size, enc: true); |
148 | } |
149 | |
150 | void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) |
151 | { |
152 | __sme_early_enc_dec(paddr, size, enc: false); |
153 | } |
154 | |
155 | static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, |
156 | bool map) |
157 | { |
158 | unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; |
159 | pmdval_t pmd_flags, pmd; |
160 | |
161 | /* Use early_pmd_flags but remove the encryption mask */ |
162 | pmd_flags = __sme_clr(early_pmd_flags); |
163 | |
164 | do { |
165 | pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; |
166 | __early_make_pgtable(address: (unsigned long)vaddr, pmd); |
167 | |
168 | vaddr += PMD_SIZE; |
169 | paddr += PMD_SIZE; |
170 | size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; |
171 | } while (size); |
172 | |
173 | flush_tlb_local(); |
174 | } |
175 | |
176 | void __init sme_unmap_bootdata(char *real_mode_data) |
177 | { |
178 | struct boot_params *boot_data; |
179 | unsigned long cmdline_paddr; |
180 | |
181 | if (!cc_platform_has(attr: CC_ATTR_HOST_MEM_ENCRYPT)) |
182 | return; |
183 | |
184 | /* Get the command line address before unmapping the real_mode_data */ |
185 | boot_data = (struct boot_params *)real_mode_data; |
186 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); |
187 | |
188 | __sme_early_map_unmap_mem(vaddr: real_mode_data, size: sizeof(boot_params), map: false); |
189 | |
190 | if (!cmdline_paddr) |
191 | return; |
192 | |
193 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, map: false); |
194 | } |
195 | |
196 | void __init sme_map_bootdata(char *real_mode_data) |
197 | { |
198 | struct boot_params *boot_data; |
199 | unsigned long cmdline_paddr; |
200 | |
201 | if (!cc_platform_has(attr: CC_ATTR_HOST_MEM_ENCRYPT)) |
202 | return; |
203 | |
204 | __sme_early_map_unmap_mem(vaddr: real_mode_data, size: sizeof(boot_params), map: true); |
205 | |
206 | /* Get the command line address after mapping the real_mode_data */ |
207 | boot_data = (struct boot_params *)real_mode_data; |
208 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); |
209 | |
210 | if (!cmdline_paddr) |
211 | return; |
212 | |
213 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, map: true); |
214 | } |
215 | |
216 | static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) |
217 | { |
218 | unsigned long pfn = 0; |
219 | pgprot_t prot; |
220 | |
221 | switch (level) { |
222 | case PG_LEVEL_4K: |
223 | pfn = pte_pfn(pte: *kpte); |
224 | prot = pte_pgprot(*kpte); |
225 | break; |
226 | case PG_LEVEL_2M: |
227 | pfn = pmd_pfn(pmd: *(pmd_t *)kpte); |
228 | prot = pmd_pgprot(*(pmd_t *)kpte); |
229 | break; |
230 | case PG_LEVEL_1G: |
231 | pfn = pud_pfn(pud: *(pud_t *)kpte); |
232 | prot = pud_pgprot(*(pud_t *)kpte); |
233 | break; |
234 | default: |
235 | WARN_ONCE(1, "Invalid level for kpte\n" ); |
236 | return 0; |
237 | } |
238 | |
239 | if (ret_prot) |
240 | *ret_prot = prot; |
241 | |
242 | return pfn; |
243 | } |
244 | |
245 | static bool amd_enc_tlb_flush_required(bool enc) |
246 | { |
247 | return true; |
248 | } |
249 | |
250 | static bool amd_enc_cache_flush_required(void) |
251 | { |
252 | return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT); |
253 | } |
254 | |
255 | static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) |
256 | { |
257 | #ifdef CONFIG_PARAVIRT |
258 | unsigned long vaddr_end = vaddr + size; |
259 | |
260 | while (vaddr < vaddr_end) { |
261 | int psize, pmask, level; |
262 | unsigned long pfn; |
263 | pte_t *kpte; |
264 | |
265 | kpte = lookup_address(address: vaddr, level: &level); |
266 | if (!kpte || pte_none(pte: *kpte)) { |
267 | WARN_ONCE(1, "kpte lookup for vaddr\n" ); |
268 | return; |
269 | } |
270 | |
271 | pfn = pg_level_to_pfn(level, kpte, NULL); |
272 | if (!pfn) |
273 | continue; |
274 | |
275 | psize = page_level_size(level); |
276 | pmask = page_level_mask(level); |
277 | |
278 | notify_page_enc_status_changed(pfn, npages: psize >> PAGE_SHIFT, enc); |
279 | |
280 | vaddr = (vaddr & pmask) + psize; |
281 | } |
282 | #endif |
283 | } |
284 | |
285 | static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) |
286 | { |
287 | /* |
288 | * To maintain the security guarantees of SEV-SNP guests, make sure |
289 | * to invalidate the memory before encryption attribute is cleared. |
290 | */ |
291 | if (cc_platform_has(attr: CC_ATTR_GUEST_SEV_SNP) && !enc) |
292 | snp_set_memory_shared(vaddr, npages); |
293 | |
294 | return true; |
295 | } |
296 | |
297 | /* Return true unconditionally: return value doesn't matter for the SEV side */ |
298 | static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) |
299 | { |
300 | /* |
301 | * After memory is mapped encrypted in the page table, validate it |
302 | * so that it is consistent with the page table updates. |
303 | */ |
304 | if (cc_platform_has(attr: CC_ATTR_GUEST_SEV_SNP) && enc) |
305 | snp_set_memory_private(vaddr, npages); |
306 | |
307 | if (!cc_platform_has(attr: CC_ATTR_HOST_MEM_ENCRYPT)) |
308 | enc_dec_hypercall(vaddr, size: npages << PAGE_SHIFT, enc); |
309 | |
310 | return true; |
311 | } |
312 | |
313 | static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) |
314 | { |
315 | pgprot_t old_prot, new_prot; |
316 | unsigned long pfn, pa, size; |
317 | pte_t new_pte; |
318 | |
319 | pfn = pg_level_to_pfn(level, kpte, ret_prot: &old_prot); |
320 | if (!pfn) |
321 | return; |
322 | |
323 | new_prot = old_prot; |
324 | if (enc) |
325 | pgprot_val(new_prot) |= _PAGE_ENC; |
326 | else |
327 | pgprot_val(new_prot) &= ~_PAGE_ENC; |
328 | |
329 | /* If prot is same then do nothing. */ |
330 | if (pgprot_val(old_prot) == pgprot_val(new_prot)) |
331 | return; |
332 | |
333 | pa = pfn << PAGE_SHIFT; |
334 | size = page_level_size(level); |
335 | |
336 | /* |
337 | * We are going to perform in-place en-/decryption and change the |
338 | * physical page attribute from C=1 to C=0 or vice versa. Flush the |
339 | * caches to ensure that data gets accessed with the correct C-bit. |
340 | */ |
341 | clflush_cache_range(__va(pa), size); |
342 | |
343 | /* Encrypt/decrypt the contents in-place */ |
344 | if (enc) { |
345 | sme_early_encrypt(paddr: pa, size); |
346 | } else { |
347 | sme_early_decrypt(paddr: pa, size); |
348 | |
349 | /* |
350 | * ON SNP, the page state in the RMP table must happen |
351 | * before the page table updates. |
352 | */ |
353 | early_snp_set_memory_shared(vaddr: (unsigned long)__va(pa), paddr: pa, npages: 1); |
354 | } |
355 | |
356 | /* Change the page encryption mask. */ |
357 | new_pte = pfn_pte(page_nr: pfn, pgprot: new_prot); |
358 | set_pte_atomic(ptep: kpte, pte: new_pte); |
359 | |
360 | /* |
361 | * If page is set encrypted in the page table, then update the RMP table to |
362 | * add this page as private. |
363 | */ |
364 | if (enc) |
365 | early_snp_set_memory_private(vaddr: (unsigned long)__va(pa), paddr: pa, npages: 1); |
366 | } |
367 | |
368 | static int __init early_set_memory_enc_dec(unsigned long vaddr, |
369 | unsigned long size, bool enc) |
370 | { |
371 | unsigned long vaddr_end, vaddr_next, start; |
372 | unsigned long psize, pmask; |
373 | int split_page_size_mask; |
374 | int level, ret; |
375 | pte_t *kpte; |
376 | |
377 | start = vaddr; |
378 | vaddr_next = vaddr; |
379 | vaddr_end = vaddr + size; |
380 | |
381 | for (; vaddr < vaddr_end; vaddr = vaddr_next) { |
382 | kpte = lookup_address(address: vaddr, level: &level); |
383 | if (!kpte || pte_none(pte: *kpte)) { |
384 | ret = 1; |
385 | goto out; |
386 | } |
387 | |
388 | if (level == PG_LEVEL_4K) { |
389 | __set_clr_pte_enc(kpte, level, enc); |
390 | vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE; |
391 | continue; |
392 | } |
393 | |
394 | psize = page_level_size(level); |
395 | pmask = page_level_mask(level); |
396 | |
397 | /* |
398 | * Check whether we can change the large page in one go. |
399 | * We request a split when the address is not aligned and |
400 | * the number of pages to set/clear encryption bit is smaller |
401 | * than the number of pages in the large page. |
402 | */ |
403 | if (vaddr == (vaddr & pmask) && |
404 | ((vaddr_end - vaddr) >= psize)) { |
405 | __set_clr_pte_enc(kpte, level, enc); |
406 | vaddr_next = (vaddr & pmask) + psize; |
407 | continue; |
408 | } |
409 | |
410 | /* |
411 | * The virtual address is part of a larger page, create the next |
412 | * level page table mapping (4K or 2M). If it is part of a 2M |
413 | * page then we request a split of the large page into 4K |
414 | * chunks. A 1GB large page is split into 2M pages, resp. |
415 | */ |
416 | if (level == PG_LEVEL_2M) |
417 | split_page_size_mask = 0; |
418 | else |
419 | split_page_size_mask = 1 << PG_LEVEL_2M; |
420 | |
421 | /* |
422 | * kernel_physical_mapping_change() does not flush the TLBs, so |
423 | * a TLB flush is required after we exit from the for loop. |
424 | */ |
425 | kernel_physical_mapping_change(__pa(vaddr & pmask), |
426 | __pa((vaddr_end & pmask) + psize), |
427 | page_size_mask: split_page_size_mask); |
428 | } |
429 | |
430 | ret = 0; |
431 | |
432 | early_set_mem_enc_dec_hypercall(vaddr: start, size, enc); |
433 | out: |
434 | __flush_tlb_all(); |
435 | return ret; |
436 | } |
437 | |
438 | int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) |
439 | { |
440 | return early_set_memory_enc_dec(vaddr, size, enc: false); |
441 | } |
442 | |
443 | int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) |
444 | { |
445 | return early_set_memory_enc_dec(vaddr, size, enc: true); |
446 | } |
447 | |
448 | void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) |
449 | { |
450 | enc_dec_hypercall(vaddr, size, enc); |
451 | } |
452 | |
453 | void __init sme_early_init(void) |
454 | { |
455 | if (!sme_me_mask) |
456 | return; |
457 | |
458 | early_pmd_flags = __sme_set(early_pmd_flags); |
459 | |
460 | __supported_pte_mask = __sme_set(__supported_pte_mask); |
461 | |
462 | /* Update the protection map with memory encryption mask */ |
463 | add_encrypt_protection_map(); |
464 | |
465 | x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare; |
466 | x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; |
467 | x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; |
468 | x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; |
469 | |
470 | /* |
471 | * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the |
472 | * parallel bringup low level code. That raises #VC which cannot be |
473 | * handled there. |
474 | * It does not provide a RDMSR GHCB protocol so the early startup |
475 | * code cannot directly communicate with the secure firmware. The |
476 | * alternative solution to retrieve the APIC ID via CPUID(0xb), |
477 | * which is covered by the GHCB protocol, is not viable either |
478 | * because there is no enforcement of the CPUID(0xb) provided |
479 | * "initial" APIC ID to be the same as the real APIC ID. |
480 | * Disable parallel bootup. |
481 | */ |
482 | if (sev_status & MSR_AMD64_SEV_ES_ENABLED) |
483 | x86_cpuinit.parallel_bringup = false; |
484 | } |
485 | |
486 | void __init mem_encrypt_free_decrypted_mem(void) |
487 | { |
488 | unsigned long vaddr, vaddr_end, npages; |
489 | int r; |
490 | |
491 | vaddr = (unsigned long)__start_bss_decrypted_unused; |
492 | vaddr_end = (unsigned long)__end_bss_decrypted; |
493 | npages = (vaddr_end - vaddr) >> PAGE_SHIFT; |
494 | |
495 | /* |
496 | * If the unused memory range was mapped decrypted, change the encryption |
497 | * attribute from decrypted to encrypted before freeing it. Base the |
498 | * re-encryption on the same condition used for the decryption in |
499 | * sme_postprocess_startup(). Higher level abstractions, such as |
500 | * CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM |
501 | * using vTOM, where sme_me_mask is always zero. |
502 | */ |
503 | if (sme_me_mask) { |
504 | r = set_memory_encrypted(addr: vaddr, numpages: npages); |
505 | if (r) { |
506 | pr_warn("failed to free unused decrypted pages\n" ); |
507 | return; |
508 | } |
509 | } |
510 | |
511 | free_init_pages(what: "unused decrypted" , begin: vaddr, end: vaddr_end); |
512 | } |
513 | |