1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * Transitional page tables for kexec and hibernate |
5 | * |
6 | * This file derived from: arch/arm64/kernel/hibernate.c |
7 | * |
8 | * Copyright (c) 2021, Microsoft Corporation. |
9 | * Pasha Tatashin <pasha.tatashin@soleen.com> |
10 | * |
11 | */ |
12 | |
13 | /* |
14 | * Transitional tables are used during system transferring from one world to |
15 | * another: such as during hibernate restore, and kexec reboots. During these |
16 | * phases one cannot rely on page table not being overwritten. This is because |
17 | * hibernate and kexec can overwrite the current page tables during transition. |
18 | */ |
19 | |
20 | #include <asm/trans_pgd.h> |
21 | #include <asm/pgalloc.h> |
22 | #include <asm/pgtable.h> |
23 | #include <linux/suspend.h> |
24 | #include <linux/bug.h> |
25 | #include <linux/mm.h> |
26 | #include <linux/mmzone.h> |
27 | #include <linux/kfence.h> |
28 | |
29 | static void *trans_alloc(struct trans_pgd_info *info) |
30 | { |
31 | return info->trans_alloc_page(info->trans_alloc_arg); |
32 | } |
33 | |
34 | static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) |
35 | { |
36 | pte_t pte = __ptep_get(src_ptep); |
37 | |
38 | if (pte_valid(pte)) { |
39 | /* |
40 | * Resume will overwrite areas that may be marked |
41 | * read only (code, rodata). Clear the RDONLY bit from |
42 | * the temporary mappings we use during restore. |
43 | */ |
44 | __set_pte(dst_ptep, pte_mkwrite_novma(pte)); |
45 | } else if ((debug_pagealloc_enabled() || |
46 | is_kfence_address(addr: (void *)addr)) && !pte_none(pte)) { |
47 | /* |
48 | * debug_pagealloc will removed the PTE_VALID bit if |
49 | * the page isn't in use by the resume kernel. It may have |
50 | * been in use by the original kernel, in which case we need |
51 | * to put it back in our copy to do the restore. |
52 | * |
53 | * Before marking this entry valid, check the pfn should |
54 | * be mapped. |
55 | */ |
56 | BUG_ON(!pfn_valid(pte_pfn(pte))); |
57 | |
58 | __set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte))); |
59 | } |
60 | } |
61 | |
62 | static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp, |
63 | pmd_t *src_pmdp, unsigned long start, unsigned long end) |
64 | { |
65 | pte_t *src_ptep; |
66 | pte_t *dst_ptep; |
67 | unsigned long addr = start; |
68 | |
69 | dst_ptep = trans_alloc(info); |
70 | if (!dst_ptep) |
71 | return -ENOMEM; |
72 | pmd_populate_kernel(NULL, pmd: dst_pmdp, pte: dst_ptep); |
73 | dst_ptep = pte_offset_kernel(pmd: dst_pmdp, address: start); |
74 | |
75 | src_ptep = pte_offset_kernel(pmd: src_pmdp, address: start); |
76 | do { |
77 | _copy_pte(dst_ptep, src_ptep, addr); |
78 | } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end); |
79 | |
80 | return 0; |
81 | } |
82 | |
83 | static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp, |
84 | pud_t *src_pudp, unsigned long start, unsigned long end) |
85 | { |
86 | pmd_t *src_pmdp; |
87 | pmd_t *dst_pmdp; |
88 | unsigned long next; |
89 | unsigned long addr = start; |
90 | |
91 | if (pud_none(READ_ONCE(*dst_pudp))) { |
92 | dst_pmdp = trans_alloc(info); |
93 | if (!dst_pmdp) |
94 | return -ENOMEM; |
95 | pud_populate(NULL, pud: dst_pudp, pmd: dst_pmdp); |
96 | } |
97 | dst_pmdp = pmd_offset(pud: dst_pudp, address: start); |
98 | |
99 | src_pmdp = pmd_offset(pud: src_pudp, address: start); |
100 | do { |
101 | pmd_t pmd = READ_ONCE(*src_pmdp); |
102 | |
103 | next = pmd_addr_end(addr, end); |
104 | if (pmd_none(pmd)) |
105 | continue; |
106 | if (pmd_table(pmd)) { |
107 | if (copy_pte(info, dst_pmdp, src_pmdp, start: addr, end: next)) |
108 | return -ENOMEM; |
109 | } else { |
110 | set_pmd(pmdp: dst_pmdp, |
111 | pmd: __pmd(val: pmd_val(pmd) & ~PMD_SECT_RDONLY)); |
112 | } |
113 | } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); |
114 | |
115 | return 0; |
116 | } |
117 | |
118 | static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp, |
119 | p4d_t *src_p4dp, unsigned long start, |
120 | unsigned long end) |
121 | { |
122 | pud_t *dst_pudp; |
123 | pud_t *src_pudp; |
124 | unsigned long next; |
125 | unsigned long addr = start; |
126 | |
127 | if (p4d_none(READ_ONCE(*dst_p4dp))) { |
128 | dst_pudp = trans_alloc(info); |
129 | if (!dst_pudp) |
130 | return -ENOMEM; |
131 | p4d_populate(NULL, p4d: dst_p4dp, pud: dst_pudp); |
132 | } |
133 | dst_pudp = pud_offset(p4d: dst_p4dp, address: start); |
134 | |
135 | src_pudp = pud_offset(p4d: src_p4dp, address: start); |
136 | do { |
137 | pud_t pud = READ_ONCE(*src_pudp); |
138 | |
139 | next = pud_addr_end(addr, end); |
140 | if (pud_none(pud)) |
141 | continue; |
142 | if (pud_table(pud)) { |
143 | if (copy_pmd(info, dst_pudp, src_pudp, start: addr, end: next)) |
144 | return -ENOMEM; |
145 | } else { |
146 | set_pud(pudp: dst_pudp, |
147 | pud: __pud(val: pud_val(pud) & ~PUD_SECT_RDONLY)); |
148 | } |
149 | } while (dst_pudp++, src_pudp++, addr = next, addr != end); |
150 | |
151 | return 0; |
152 | } |
153 | |
154 | static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp, |
155 | pgd_t *src_pgdp, unsigned long start, |
156 | unsigned long end) |
157 | { |
158 | p4d_t *dst_p4dp; |
159 | p4d_t *src_p4dp; |
160 | unsigned long next; |
161 | unsigned long addr = start; |
162 | |
163 | dst_p4dp = p4d_offset(pgd: dst_pgdp, address: start); |
164 | src_p4dp = p4d_offset(pgd: src_pgdp, address: start); |
165 | do { |
166 | next = p4d_addr_end(addr, end); |
167 | if (p4d_none(READ_ONCE(*src_p4dp))) |
168 | continue; |
169 | if (copy_pud(info, dst_p4dp, src_p4dp, start: addr, end: next)) |
170 | return -ENOMEM; |
171 | } while (dst_p4dp++, src_p4dp++, addr = next, addr != end); |
172 | |
173 | return 0; |
174 | } |
175 | |
176 | static int copy_page_tables(struct trans_pgd_info *info, pgd_t *dst_pgdp, |
177 | unsigned long start, unsigned long end) |
178 | { |
179 | unsigned long next; |
180 | unsigned long addr = start; |
181 | pgd_t *src_pgdp = pgd_offset_k(start); |
182 | |
183 | dst_pgdp = pgd_offset_pgd(pgd: dst_pgdp, address: start); |
184 | do { |
185 | next = pgd_addr_end(addr, end); |
186 | if (pgd_none(READ_ONCE(*src_pgdp))) |
187 | continue; |
188 | if (copy_p4d(info, dst_pgdp, src_pgdp, start: addr, end: next)) |
189 | return -ENOMEM; |
190 | } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); |
191 | |
192 | return 0; |
193 | } |
194 | |
195 | /* |
196 | * Create trans_pgd and copy linear map. |
197 | * info: contains allocator and its argument |
198 | * dst_pgdp: new page table that is created, and to which map is copied. |
199 | * start: Start of the interval (inclusive). |
200 | * end: End of the interval (exclusive). |
201 | * |
202 | * Returns 0 on success, and -ENOMEM on failure. |
203 | */ |
204 | int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, |
205 | unsigned long start, unsigned long end) |
206 | { |
207 | int rc; |
208 | pgd_t *trans_pgd = trans_alloc(info); |
209 | |
210 | if (!trans_pgd) { |
211 | pr_err("Failed to allocate memory for temporary page tables.\n" ); |
212 | return -ENOMEM; |
213 | } |
214 | |
215 | rc = copy_page_tables(info, dst_pgdp: trans_pgd, start, end); |
216 | if (!rc) |
217 | *dst_pgdp = trans_pgd; |
218 | |
219 | return rc; |
220 | } |
221 | |
222 | /* |
223 | * The page we want to idmap may be outside the range covered by VA_BITS that |
224 | * can be built using the kernel's p?d_populate() helpers. As a one off, for a |
225 | * single page, we build these page tables bottom up and just assume that will |
226 | * need the maximum T0SZ. |
227 | * |
228 | * Returns 0 on success, and -ENOMEM on failure. |
229 | * On success trans_ttbr0 contains page table with idmapped page, t0sz is set to |
230 | * maximum T0SZ for this page. |
231 | */ |
232 | int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, |
233 | unsigned long *t0sz, void *page) |
234 | { |
235 | phys_addr_t dst_addr = virt_to_phys(page); |
236 | unsigned long pfn = __phys_to_pfn(dst_addr); |
237 | int max_msb = (dst_addr & GENMASK(52, 48)) ? 51 : 47; |
238 | int bits_mapped = PAGE_SHIFT - 4; |
239 | unsigned long level_mask, prev_level_entry, *levels[4]; |
240 | int this_level, index, level_lsb, level_msb; |
241 | |
242 | dst_addr &= PAGE_MASK; |
243 | prev_level_entry = pte_val(pte: pfn_pte(page_nr: pfn, PAGE_KERNEL_ROX)); |
244 | |
245 | for (this_level = 3; this_level >= 0; this_level--) { |
246 | levels[this_level] = trans_alloc(info); |
247 | if (!levels[this_level]) |
248 | return -ENOMEM; |
249 | |
250 | level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level); |
251 | level_msb = min(level_lsb + bits_mapped, max_msb); |
252 | level_mask = GENMASK_ULL(level_msb, level_lsb); |
253 | |
254 | index = (dst_addr & level_mask) >> level_lsb; |
255 | *(levels[this_level] + index) = prev_level_entry; |
256 | |
257 | pfn = virt_to_pfn(levels[this_level]); |
258 | prev_level_entry = pte_val(pfn_pte(pfn, |
259 | __pgprot(PMD_TYPE_TABLE))); |
260 | |
261 | if (level_msb == max_msb) |
262 | break; |
263 | } |
264 | |
265 | *trans_ttbr0 = phys_to_ttbr(__pfn_to_phys(pfn)); |
266 | *t0sz = TCR_T0SZ(max_msb + 1); |
267 | |
268 | return 0; |
269 | } |
270 | |
271 | /* |
272 | * Create a copy of the vector table so we can call HVC_SET_VECTORS or |
273 | * HVC_SOFT_RESTART from contexts where the table may be overwritten. |
274 | */ |
275 | int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, |
276 | phys_addr_t *el2_vectors) |
277 | { |
278 | void *hyp_stub = trans_alloc(info); |
279 | |
280 | if (!hyp_stub) |
281 | return -ENOMEM; |
282 | *el2_vectors = virt_to_phys(hyp_stub); |
283 | memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN); |
284 | caches_clean_inval_pou((unsigned long)hyp_stub, |
285 | (unsigned long)hyp_stub + |
286 | ARM64_VECTOR_TABLE_LEN); |
287 | dcache_clean_inval_poc((unsigned long)hyp_stub, |
288 | (unsigned long)hyp_stub + |
289 | ARM64_VECTOR_TABLE_LEN); |
290 | |
291 | return 0; |
292 | } |
293 | |