1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * kexec for arm64 |
4 | * |
5 | * Copyright (C) Linaro. |
6 | * Copyright (C) Huawei Futurewei Technologies. |
7 | */ |
8 | |
9 | #include <linux/interrupt.h> |
10 | #include <linux/irq.h> |
11 | #include <linux/kernel.h> |
12 | #include <linux/kexec.h> |
13 | #include <linux/page-flags.h> |
14 | #include <linux/reboot.h> |
15 | #include <linux/set_memory.h> |
16 | #include <linux/smp.h> |
17 | |
18 | #include <asm/cacheflush.h> |
19 | #include <asm/cpu_ops.h> |
20 | #include <asm/daifflags.h> |
21 | #include <asm/memory.h> |
22 | #include <asm/mmu.h> |
23 | #include <asm/mmu_context.h> |
24 | #include <asm/page.h> |
25 | #include <asm/sections.h> |
26 | #include <asm/trans_pgd.h> |
27 | |
28 | /** |
29 | * kexec_image_info - For debugging output. |
30 | */ |
31 | #define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i) |
32 | static void _kexec_image_info(const char *func, int line, |
33 | const struct kimage *kimage) |
34 | { |
35 | kexec_dprintk("%s:%d:\n" , func, line); |
36 | kexec_dprintk(" kexec kimage info:\n" ); |
37 | kexec_dprintk(" type: %d\n" , kimage->type); |
38 | kexec_dprintk(" head: %lx\n" , kimage->head); |
39 | kexec_dprintk(" kern_reloc: %pa\n" , &kimage->arch.kern_reloc); |
40 | kexec_dprintk(" el2_vectors: %pa\n" , &kimage->arch.el2_vectors); |
41 | } |
42 | |
43 | void machine_kexec_cleanup(struct kimage *kimage) |
44 | { |
45 | /* Empty routine needed to avoid build errors. */ |
46 | } |
47 | |
48 | /** |
49 | * machine_kexec_prepare - Prepare for a kexec reboot. |
50 | * |
51 | * Called from the core kexec code when a kernel image is loaded. |
52 | * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus |
53 | * are stuck in the kernel. This avoids a panic once we hit machine_kexec(). |
54 | */ |
55 | int machine_kexec_prepare(struct kimage *kimage) |
56 | { |
57 | if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { |
58 | pr_err("Can't kexec: CPUs are stuck in the kernel.\n" ); |
59 | return -EBUSY; |
60 | } |
61 | |
62 | return 0; |
63 | } |
64 | |
65 | /** |
66 | * kexec_segment_flush - Helper to flush the kimage segments to PoC. |
67 | */ |
68 | static void kexec_segment_flush(const struct kimage *kimage) |
69 | { |
70 | unsigned long i; |
71 | |
72 | pr_debug("%s:\n" , __func__); |
73 | |
74 | for (i = 0; i < kimage->nr_segments; i++) { |
75 | pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n" , |
76 | i, |
77 | kimage->segment[i].mem, |
78 | kimage->segment[i].mem + kimage->segment[i].memsz, |
79 | kimage->segment[i].memsz, |
80 | kimage->segment[i].memsz / PAGE_SIZE); |
81 | |
82 | dcache_clean_inval_poc( |
83 | (unsigned long)phys_to_virt(address: kimage->segment[i].mem), |
84 | (unsigned long)phys_to_virt(address: kimage->segment[i].mem) + |
85 | kimage->segment[i].memsz); |
86 | } |
87 | } |
88 | |
89 | /* Allocates pages for kexec page table */ |
90 | static void *kexec_page_alloc(void *arg) |
91 | { |
92 | struct kimage *kimage = arg; |
93 | struct page *page = kimage_alloc_control_pages(image: kimage, order: 0); |
94 | void *vaddr = NULL; |
95 | |
96 | if (!page) |
97 | return NULL; |
98 | |
99 | vaddr = page_address(page); |
100 | memset(vaddr, 0, PAGE_SIZE); |
101 | |
102 | return vaddr; |
103 | } |
104 | |
105 | int machine_kexec_post_load(struct kimage *kimage) |
106 | { |
107 | int rc; |
108 | pgd_t *trans_pgd; |
109 | void *reloc_code = page_to_virt(kimage->control_code_page); |
110 | long reloc_size; |
111 | struct trans_pgd_info info = { |
112 | .trans_alloc_page = kexec_page_alloc, |
113 | .trans_alloc_arg = kimage, |
114 | }; |
115 | |
116 | /* If in place, relocation is not used, only flush next kernel */ |
117 | if (kimage->head & IND_DONE) { |
118 | kexec_segment_flush(kimage); |
119 | kexec_image_info(kimage); |
120 | return 0; |
121 | } |
122 | |
123 | kimage->arch.el2_vectors = 0; |
124 | if (is_hyp_nvhe()) { |
125 | rc = trans_pgd_copy_el2_vectors(&info, |
126 | &kimage->arch.el2_vectors); |
127 | if (rc) |
128 | return rc; |
129 | } |
130 | |
131 | /* Create a copy of the linear map */ |
132 | trans_pgd = kexec_page_alloc(arg: kimage); |
133 | if (!trans_pgd) |
134 | return -ENOMEM; |
135 | rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END); |
136 | if (rc) |
137 | return rc; |
138 | kimage->arch.ttbr1 = __pa(trans_pgd); |
139 | kimage->arch.zero_page = __pa_symbol(empty_zero_page); |
140 | |
141 | reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; |
142 | memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); |
143 | kimage->arch.kern_reloc = __pa(reloc_code); |
144 | rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0, |
145 | &kimage->arch.t0sz, reloc_code); |
146 | if (rc) |
147 | return rc; |
148 | kimage->arch.phys_offset = virt_to_phys(address: kimage) - (long)kimage; |
149 | |
150 | /* Flush the reloc_code in preparation for its execution. */ |
151 | dcache_clean_inval_poc((unsigned long)reloc_code, |
152 | (unsigned long)reloc_code + reloc_size); |
153 | icache_inval_pou((uintptr_t)reloc_code, |
154 | (uintptr_t)reloc_code + reloc_size); |
155 | kexec_image_info(kimage); |
156 | |
157 | return 0; |
158 | } |
159 | |
160 | /** |
161 | * machine_kexec - Do the kexec reboot. |
162 | * |
163 | * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC. |
164 | */ |
165 | void machine_kexec(struct kimage *kimage) |
166 | { |
167 | bool in_kexec_crash = (kimage == kexec_crash_image); |
168 | bool stuck_cpus = cpus_are_stuck_in_kernel(); |
169 | |
170 | /* |
171 | * New cpus may have become stuck_in_kernel after we loaded the image. |
172 | */ |
173 | BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1))); |
174 | WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), |
175 | "Some CPUs may be stale, kdump will be unreliable.\n" ); |
176 | |
177 | pr_info("Bye!\n" ); |
178 | |
179 | local_daif_mask(); |
180 | |
181 | /* |
182 | * Both restart and kernel_reloc will shutdown the MMU, disable data |
183 | * caches. However, restart will start new kernel or purgatory directly, |
184 | * kernel_reloc contains the body of arm64_relocate_new_kernel |
185 | * In kexec case, kimage->start points to purgatory assuming that |
186 | * kernel entry and dtb address are embedded in purgatory by |
187 | * userspace (kexec-tools). |
188 | * In kexec_file case, the kernel starts directly without purgatory. |
189 | */ |
190 | if (kimage->head & IND_DONE) { |
191 | typeof(cpu_soft_restart) *restart; |
192 | |
193 | cpu_install_idmap(); |
194 | restart = (void *)__pa_symbol(cpu_soft_restart); |
195 | restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, |
196 | 0, 0); |
197 | } else { |
198 | void (*kernel_reloc)(struct kimage *kimage); |
199 | |
200 | if (is_hyp_nvhe()) |
201 | __hyp_set_vectors(kimage->arch.el2_vectors); |
202 | cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz); |
203 | kernel_reloc = (void *)kimage->arch.kern_reloc; |
204 | kernel_reloc(kimage); |
205 | } |
206 | |
207 | BUG(); /* Should never get here. */ |
208 | } |
209 | |
210 | static void machine_kexec_mask_interrupts(void) |
211 | { |
212 | unsigned int i; |
213 | struct irq_desc *desc; |
214 | |
215 | for_each_irq_desc(i, desc) { |
216 | struct irq_chip *chip; |
217 | int ret; |
218 | |
219 | chip = irq_desc_get_chip(desc); |
220 | if (!chip) |
221 | continue; |
222 | |
223 | /* |
224 | * First try to remove the active state. If this |
225 | * fails, try to EOI the interrupt. |
226 | */ |
227 | ret = irq_set_irqchip_state(irq: i, which: IRQCHIP_STATE_ACTIVE, state: false); |
228 | |
229 | if (ret && irqd_irq_inprogress(d: &desc->irq_data) && |
230 | chip->irq_eoi) |
231 | chip->irq_eoi(&desc->irq_data); |
232 | |
233 | if (chip->irq_mask) |
234 | chip->irq_mask(&desc->irq_data); |
235 | |
236 | if (chip->irq_disable && !irqd_irq_disabled(d: &desc->irq_data)) |
237 | chip->irq_disable(&desc->irq_data); |
238 | } |
239 | } |
240 | |
241 | /** |
242 | * machine_crash_shutdown - shutdown non-crashing cpus and save registers |
243 | */ |
244 | void machine_crash_shutdown(struct pt_regs *regs) |
245 | { |
246 | local_irq_disable(); |
247 | |
248 | /* shutdown non-crashing cpus */ |
249 | crash_smp_send_stop(); |
250 | |
251 | /* for crashing cpu */ |
252 | crash_save_cpu(regs, smp_processor_id()); |
253 | machine_kexec_mask_interrupts(); |
254 | |
255 | pr_info("Starting crashdump kernel...\n" ); |
256 | } |
257 | |
258 | #if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION) |
259 | /* |
260 | * To preserve the crash dump kernel image, the relevant memory segments |
261 | * should be mapped again around the hibernation. |
262 | */ |
263 | void crash_prepare_suspend(void) |
264 | { |
265 | if (kexec_crash_image) |
266 | arch_kexec_unprotect_crashkres(); |
267 | } |
268 | |
269 | void crash_post_resume(void) |
270 | { |
271 | if (kexec_crash_image) |
272 | arch_kexec_protect_crashkres(); |
273 | } |
274 | |
275 | /* |
276 | * crash_is_nosave |
277 | * |
278 | * Return true only if a page is part of reserved memory for crash dump kernel, |
279 | * but does not hold any data of loaded kernel image. |
280 | * |
281 | * Note that all the pages in crash dump kernel memory have been initially |
282 | * marked as Reserved as memory was allocated via memblock_reserve(). |
283 | * |
284 | * In hibernation, the pages which are Reserved and yet "nosave" are excluded |
285 | * from the hibernation iamge. crash_is_nosave() does thich check for crash |
286 | * dump kernel and will reduce the total size of hibernation image. |
287 | */ |
288 | |
289 | bool crash_is_nosave(unsigned long pfn) |
290 | { |
291 | int i; |
292 | phys_addr_t addr; |
293 | |
294 | if (!crashk_res.end) |
295 | return false; |
296 | |
297 | /* in reserved memory? */ |
298 | addr = __pfn_to_phys(pfn); |
299 | if ((addr < crashk_res.start) || (crashk_res.end < addr)) { |
300 | if (!crashk_low_res.end) |
301 | return false; |
302 | |
303 | if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr)) |
304 | return false; |
305 | } |
306 | |
307 | if (!kexec_crash_image) |
308 | return true; |
309 | |
310 | /* not part of loaded kernel image? */ |
311 | for (i = 0; i < kexec_crash_image->nr_segments; i++) |
312 | if (addr >= kexec_crash_image->segment[i].mem && |
313 | addr < (kexec_crash_image->segment[i].mem + |
314 | kexec_crash_image->segment[i].memsz)) |
315 | return false; |
316 | |
317 | return true; |
318 | } |
319 | |
320 | void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) |
321 | { |
322 | unsigned long addr; |
323 | struct page *page; |
324 | |
325 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
326 | page = phys_to_page(addr); |
327 | free_reserved_page(page); |
328 | } |
329 | } |
330 | #endif /* CONFIG_HIBERNATION */ |
331 | |