1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * Transitional page tables for kexec and hibernate |
5 | * |
6 | * This file derived from: arch/arm64/kernel/hibernate.c |
7 | * |
8 | * Copyright (c) 2021, Microsoft Corporation. |
9 | * Pasha Tatashin <pasha.tatashin@soleen.com> |
10 | * |
11 | */ |
12 | |
13 | /* |
14 | * Transitional tables are used during system transferring from one world to |
15 | * another: such as during hibernate restore, and kexec reboots. During these |
16 | * phases one cannot rely on page table not being overwritten. This is because |
17 | * hibernate and kexec can overwrite the current page tables during transition. |
18 | */ |
19 | |
20 | #include <asm/trans_pgd.h> |
21 | #include <asm/pgalloc.h> |
22 | #include <asm/pgtable.h> |
23 | #include <linux/suspend.h> |
24 | #include <linux/bug.h> |
25 | #include <linux/mm.h> |
26 | #include <linux/mmzone.h> |
27 | #include <linux/kfence.h> |
28 | |
29 | static void *trans_alloc(struct trans_pgd_info *info) |
30 | { |
31 | return info->trans_alloc_page(info->trans_alloc_arg); |
32 | } |
33 | |
34 | static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) |
35 | { |
36 | pte_t pte = __ptep_get(src_ptep); |
37 | |
38 | if (pte_valid(pte)) { |
39 | /* |
40 | * Resume will overwrite areas that may be marked |
41 | * read only (code, rodata). Clear the RDONLY bit from |
42 | * the temporary mappings we use during restore. |
43 | */ |
44 | __set_pte(dst_ptep, pte_mkwrite_novma(pte)); |
45 | } else if (!pte_none(pte)) { |
46 | /* |
47 | * debug_pagealloc will removed the PTE_VALID bit if |
48 | * the page isn't in use by the resume kernel. It may have |
49 | * been in use by the original kernel, in which case we need |
50 | * to put it back in our copy to do the restore. |
51 | * |
52 | * Other cases include kfence / vmalloc / memfd_secret which |
53 | * may call `set_direct_map_invalid_noflush()`. |
54 | * |
55 | * Before marking this entry valid, check the pfn should |
56 | * be mapped. |
57 | */ |
58 | BUG_ON(!pfn_valid(pte_pfn(pte))); |
59 | |
60 | __set_pte(dst_ptep, pte_mkvalid(pte_mkwrite_novma(pte))); |
61 | } |
62 | } |
63 | |
64 | static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp, |
65 | pmd_t *src_pmdp, unsigned long start, unsigned long end) |
66 | { |
67 | pte_t *src_ptep; |
68 | pte_t *dst_ptep; |
69 | unsigned long addr = start; |
70 | |
71 | dst_ptep = trans_alloc(info); |
72 | if (!dst_ptep) |
73 | return -ENOMEM; |
74 | pmd_populate_kernel(NULL, pmd: dst_pmdp, pte: dst_ptep); |
75 | dst_ptep = pte_offset_kernel(pmd: dst_pmdp, address: start); |
76 | |
77 | src_ptep = pte_offset_kernel(pmd: src_pmdp, address: start); |
78 | do { |
79 | _copy_pte(dst_ptep, src_ptep, addr); |
80 | } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end); |
81 | |
82 | return 0; |
83 | } |
84 | |
85 | static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp, |
86 | pud_t *src_pudp, unsigned long start, unsigned long end) |
87 | { |
88 | pmd_t *src_pmdp; |
89 | pmd_t *dst_pmdp; |
90 | unsigned long next; |
91 | unsigned long addr = start; |
92 | |
93 | if (pud_none(READ_ONCE(*dst_pudp))) { |
94 | dst_pmdp = trans_alloc(info); |
95 | if (!dst_pmdp) |
96 | return -ENOMEM; |
97 | pud_populate(NULL, pud: dst_pudp, pmd: dst_pmdp); |
98 | } |
99 | dst_pmdp = pmd_offset(pud: dst_pudp, address: start); |
100 | |
101 | src_pmdp = pmd_offset(pud: src_pudp, address: start); |
102 | do { |
103 | pmd_t pmd = READ_ONCE(*src_pmdp); |
104 | |
105 | next = pmd_addr_end(addr, end); |
106 | if (pmd_none(pmd)) |
107 | continue; |
108 | if (pmd_table(pmd)) { |
109 | if (copy_pte(info, dst_pmdp, src_pmdp, start: addr, end: next)) |
110 | return -ENOMEM; |
111 | } else { |
112 | set_pmd(pmdp: dst_pmdp, |
113 | pmd: __pmd(val: pmd_val(pmd) & ~PMD_SECT_RDONLY)); |
114 | } |
115 | } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); |
116 | |
117 | return 0; |
118 | } |
119 | |
120 | static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp, |
121 | p4d_t *src_p4dp, unsigned long start, |
122 | unsigned long end) |
123 | { |
124 | pud_t *dst_pudp; |
125 | pud_t *src_pudp; |
126 | unsigned long next; |
127 | unsigned long addr = start; |
128 | |
129 | if (p4d_none(READ_ONCE(*dst_p4dp))) { |
130 | dst_pudp = trans_alloc(info); |
131 | if (!dst_pudp) |
132 | return -ENOMEM; |
133 | p4d_populate(NULL, p4d: dst_p4dp, pud: dst_pudp); |
134 | } |
135 | dst_pudp = pud_offset(p4d: dst_p4dp, address: start); |
136 | |
137 | src_pudp = pud_offset(p4d: src_p4dp, address: start); |
138 | do { |
139 | pud_t pud = READ_ONCE(*src_pudp); |
140 | |
141 | next = pud_addr_end(addr, end); |
142 | if (pud_none(pud)) |
143 | continue; |
144 | if (pud_table(pud)) { |
145 | if (copy_pmd(info, dst_pudp, src_pudp, start: addr, end: next)) |
146 | return -ENOMEM; |
147 | } else { |
148 | set_pud(pudp: dst_pudp, |
149 | pud: __pud(val: pud_val(pud) & ~PUD_SECT_RDONLY)); |
150 | } |
151 | } while (dst_pudp++, src_pudp++, addr = next, addr != end); |
152 | |
153 | return 0; |
154 | } |
155 | |
156 | static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp, |
157 | pgd_t *src_pgdp, unsigned long start, |
158 | unsigned long end) |
159 | { |
160 | p4d_t *dst_p4dp; |
161 | p4d_t *src_p4dp; |
162 | unsigned long next; |
163 | unsigned long addr = start; |
164 | |
165 | if (pgd_none(READ_ONCE(*dst_pgdp))) { |
166 | dst_p4dp = trans_alloc(info); |
167 | if (!dst_p4dp) |
168 | return -ENOMEM; |
169 | pgd_populate(NULL, pgd: dst_pgdp, p4d: dst_p4dp); |
170 | } |
171 | |
172 | dst_p4dp = p4d_offset(pgd: dst_pgdp, address: start); |
173 | src_p4dp = p4d_offset(pgd: src_pgdp, address: start); |
174 | do { |
175 | next = p4d_addr_end(addr, end); |
176 | if (p4d_none(READ_ONCE(*src_p4dp))) |
177 | continue; |
178 | if (copy_pud(info, dst_p4dp, src_p4dp, start: addr, end: next)) |
179 | return -ENOMEM; |
180 | } while (dst_p4dp++, src_p4dp++, addr = next, addr != end); |
181 | |
182 | return 0; |
183 | } |
184 | |
185 | static int copy_page_tables(struct trans_pgd_info *info, pgd_t *dst_pgdp, |
186 | unsigned long start, unsigned long end) |
187 | { |
188 | unsigned long next; |
189 | unsigned long addr = start; |
190 | pgd_t *src_pgdp = pgd_offset_k(start); |
191 | |
192 | dst_pgdp = pgd_offset_pgd(pgd: dst_pgdp, address: start); |
193 | do { |
194 | next = pgd_addr_end(addr, end); |
195 | if (pgd_none(READ_ONCE(*src_pgdp))) |
196 | continue; |
197 | if (copy_p4d(info, dst_pgdp, src_pgdp, start: addr, end: next)) |
198 | return -ENOMEM; |
199 | } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); |
200 | |
201 | return 0; |
202 | } |
203 | |
204 | /* |
205 | * Create trans_pgd and copy linear map. |
206 | * info: contains allocator and its argument |
207 | * dst_pgdp: new page table that is created, and to which map is copied. |
208 | * start: Start of the interval (inclusive). |
209 | * end: End of the interval (exclusive). |
210 | * |
211 | * Returns 0 on success, and -ENOMEM on failure. |
212 | */ |
213 | int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, |
214 | unsigned long start, unsigned long end) |
215 | { |
216 | int rc; |
217 | pgd_t *trans_pgd = trans_alloc(info); |
218 | |
219 | if (!trans_pgd) { |
220 | pr_err("Failed to allocate memory for temporary page tables.\n" ); |
221 | return -ENOMEM; |
222 | } |
223 | |
224 | rc = copy_page_tables(info, dst_pgdp: trans_pgd, start, end); |
225 | if (!rc) |
226 | *dst_pgdp = trans_pgd; |
227 | |
228 | return rc; |
229 | } |
230 | |
231 | /* |
232 | * The page we want to idmap may be outside the range covered by VA_BITS that |
233 | * can be built using the kernel's p?d_populate() helpers. As a one off, for a |
234 | * single page, we build these page tables bottom up and just assume that will |
235 | * need the maximum T0SZ. |
236 | * |
237 | * Returns 0 on success, and -ENOMEM on failure. |
238 | * On success trans_ttbr0 contains page table with idmapped page, t0sz is set to |
239 | * maximum T0SZ for this page. |
240 | */ |
241 | int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, |
242 | unsigned long *t0sz, void *page) |
243 | { |
244 | phys_addr_t dst_addr = virt_to_phys(page); |
245 | unsigned long pfn = __phys_to_pfn(dst_addr); |
246 | int max_msb = (dst_addr & GENMASK(52, 48)) ? 51 : 47; |
247 | int bits_mapped = PAGE_SHIFT - 4; |
248 | unsigned long level_mask, prev_level_entry, *levels[4]; |
249 | int this_level, index, level_lsb, level_msb; |
250 | |
251 | dst_addr &= PAGE_MASK; |
252 | prev_level_entry = pte_val(pte: pfn_pte(page_nr: pfn, PAGE_KERNEL_ROX)); |
253 | |
254 | for (this_level = 3; this_level >= 0; this_level--) { |
255 | levels[this_level] = trans_alloc(info); |
256 | if (!levels[this_level]) |
257 | return -ENOMEM; |
258 | |
259 | level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level); |
260 | level_msb = min(level_lsb + bits_mapped, max_msb); |
261 | level_mask = GENMASK_ULL(level_msb, level_lsb); |
262 | |
263 | index = (dst_addr & level_mask) >> level_lsb; |
264 | *(levels[this_level] + index) = prev_level_entry; |
265 | |
266 | pfn = virt_to_pfn(levels[this_level]); |
267 | prev_level_entry = pte_val(pfn_pte(pfn, |
268 | __pgprot(PMD_TYPE_TABLE))); |
269 | |
270 | if (level_msb == max_msb) |
271 | break; |
272 | } |
273 | |
274 | *trans_ttbr0 = phys_to_ttbr(__pfn_to_phys(pfn)); |
275 | *t0sz = TCR_T0SZ(max_msb + 1); |
276 | |
277 | return 0; |
278 | } |
279 | |
280 | /* |
281 | * Create a copy of the vector table so we can call HVC_SET_VECTORS or |
282 | * HVC_SOFT_RESTART from contexts where the table may be overwritten. |
283 | */ |
284 | int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, |
285 | phys_addr_t *el2_vectors) |
286 | { |
287 | void *hyp_stub = trans_alloc(info); |
288 | |
289 | if (!hyp_stub) |
290 | return -ENOMEM; |
291 | *el2_vectors = virt_to_phys(hyp_stub); |
292 | memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN); |
293 | caches_clean_inval_pou((unsigned long)hyp_stub, |
294 | (unsigned long)hyp_stub + |
295 | ARM64_VECTOR_TABLE_LEN); |
296 | dcache_clean_inval_poc((unsigned long)hyp_stub, |
297 | (unsigned long)hyp_stub + |
298 | ARM64_VECTOR_TABLE_LEN); |
299 | |
300 | return 0; |
301 | } |
302 | |