1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Load ELF vmlinux file for the kexec_file_load syscall. |
4 | * |
5 | * Copyright (C) 2021 Huawei Technologies Co, Ltd. |
6 | * |
7 | * Author: Liao Chang (liaochang1@huawei.com) |
8 | * |
9 | * Based on kexec-tools' kexec-elf-riscv.c, heavily modified |
10 | * for kernel. |
11 | */ |
12 | |
13 | #define pr_fmt(fmt) "kexec_image: " fmt |
14 | |
15 | #include <linux/elf.h> |
16 | #include <linux/kexec.h> |
17 | #include <linux/slab.h> |
18 | #include <linux/of.h> |
19 | #include <linux/libfdt.h> |
20 | #include <linux/types.h> |
21 | #include <linux/memblock.h> |
22 | #include <asm/setup.h> |
23 | |
24 | int arch_kimage_file_post_load_cleanup(struct kimage *image) |
25 | { |
26 | kvfree(addr: image->arch.fdt); |
27 | image->arch.fdt = NULL; |
28 | |
29 | vfree(addr: image->elf_headers); |
30 | image->elf_headers = NULL; |
31 | image->elf_headers_sz = 0; |
32 | |
33 | return kexec_image_post_load_cleanup_default(image); |
34 | } |
35 | |
36 | static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, |
37 | struct kexec_elf_info *elf_info, unsigned long old_pbase, |
38 | unsigned long new_pbase) |
39 | { |
40 | int i; |
41 | int ret = 0; |
42 | size_t size; |
43 | struct kexec_buf kbuf; |
44 | const struct elf_phdr *phdr; |
45 | |
46 | kbuf.image = image; |
47 | |
48 | for (i = 0; i < ehdr->e_phnum; i++) { |
49 | phdr = &elf_info->proghdrs[i]; |
50 | if (phdr->p_type != PT_LOAD) |
51 | continue; |
52 | |
53 | size = phdr->p_filesz; |
54 | if (size > phdr->p_memsz) |
55 | size = phdr->p_memsz; |
56 | |
57 | kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; |
58 | kbuf.bufsz = size; |
59 | kbuf.buf_align = phdr->p_align; |
60 | kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; |
61 | kbuf.memsz = phdr->p_memsz; |
62 | kbuf.top_down = false; |
63 | ret = kexec_add_buffer(kbuf: &kbuf); |
64 | if (ret) |
65 | break; |
66 | } |
67 | |
68 | return ret; |
69 | } |
70 | |
71 | /* |
72 | * Go through the available phsyical memory regions and find one that hold |
73 | * an image of the specified size. |
74 | */ |
75 | static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, |
76 | struct elfhdr *ehdr, struct kexec_elf_info *elf_info, |
77 | unsigned long *old_pbase, unsigned long *new_pbase) |
78 | { |
79 | int i; |
80 | int ret; |
81 | struct kexec_buf kbuf; |
82 | const struct elf_phdr *phdr; |
83 | unsigned long lowest_paddr = ULONG_MAX; |
84 | unsigned long lowest_vaddr = ULONG_MAX; |
85 | |
86 | for (i = 0; i < ehdr->e_phnum; i++) { |
87 | phdr = &elf_info->proghdrs[i]; |
88 | if (phdr->p_type != PT_LOAD) |
89 | continue; |
90 | |
91 | if (lowest_paddr > phdr->p_paddr) |
92 | lowest_paddr = phdr->p_paddr; |
93 | |
94 | if (lowest_vaddr > phdr->p_vaddr) |
95 | lowest_vaddr = phdr->p_vaddr; |
96 | } |
97 | |
98 | kbuf.image = image; |
99 | kbuf.buf_min = lowest_paddr; |
100 | kbuf.buf_max = ULONG_MAX; |
101 | |
102 | /* |
103 | * Current riscv boot protocol requires 2MB alignment for |
104 | * RV64 and 4MB alignment for RV32 |
105 | * |
106 | */ |
107 | kbuf.buf_align = PMD_SIZE; |
108 | kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; |
109 | kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); |
110 | kbuf.top_down = false; |
111 | ret = arch_kexec_locate_mem_hole(kbuf: &kbuf); |
112 | if (!ret) { |
113 | *old_pbase = lowest_paddr; |
114 | *new_pbase = kbuf.mem; |
115 | image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; |
116 | } |
117 | return ret; |
118 | } |
119 | |
120 | #ifdef CONFIG_CRASH_DUMP |
121 | static int get_nr_ram_ranges_callback(struct resource *res, void *arg) |
122 | { |
123 | unsigned int *nr_ranges = arg; |
124 | |
125 | (*nr_ranges)++; |
126 | return 0; |
127 | } |
128 | |
129 | static int (struct resource *res, void *arg) |
130 | { |
131 | struct crash_mem *cmem = arg; |
132 | |
133 | cmem->ranges[cmem->nr_ranges].start = res->start; |
134 | cmem->ranges[cmem->nr_ranges].end = res->end; |
135 | cmem->nr_ranges++; |
136 | |
137 | return 0; |
138 | } |
139 | |
140 | static int (void **addr, unsigned long *sz) |
141 | { |
142 | struct crash_mem *cmem; |
143 | unsigned int nr_ranges; |
144 | int ret; |
145 | |
146 | nr_ranges = 1; /* For exclusion of crashkernel region */ |
147 | walk_system_ram_res(start: 0, end: -1, arg: &nr_ranges, func: get_nr_ram_ranges_callback); |
148 | |
149 | cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); |
150 | if (!cmem) |
151 | return -ENOMEM; |
152 | |
153 | cmem->max_nr_ranges = nr_ranges; |
154 | cmem->nr_ranges = 0; |
155 | ret = walk_system_ram_res(start: 0, end: -1, arg: cmem, func: prepare_elf64_ram_headers_callback); |
156 | if (ret) |
157 | goto out; |
158 | |
159 | /* Exclude crashkernel region */ |
160 | ret = crash_exclude_mem_range(mem: cmem, mstart: crashk_res.start, mend: crashk_res.end); |
161 | if (!ret) |
162 | ret = crash_prepare_elf64_headers(mem: cmem, need_kernel_map: true, addr, sz); |
163 | |
164 | out: |
165 | kfree(objp: cmem); |
166 | return ret; |
167 | } |
168 | |
169 | static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, |
170 | unsigned long cmdline_len) |
171 | { |
172 | int elfcorehdr_strlen; |
173 | char *cmdline_ptr; |
174 | |
175 | cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); |
176 | if (!cmdline_ptr) |
177 | return NULL; |
178 | |
179 | elfcorehdr_strlen = sprintf(buf: cmdline_ptr, fmt: "elfcorehdr=0x%lx " , |
180 | image->elf_load_addr); |
181 | |
182 | if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { |
183 | pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n" ); |
184 | kfree(objp: cmdline_ptr); |
185 | return NULL; |
186 | } |
187 | |
188 | memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); |
189 | /* Ensure it's nul terminated */ |
190 | cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; |
191 | return cmdline_ptr; |
192 | } |
193 | #endif |
194 | |
195 | static void *elf_kexec_load(struct kimage *image, char *kernel_buf, |
196 | unsigned long kernel_len, char *initrd, |
197 | unsigned long initrd_len, char *cmdline, |
198 | unsigned long cmdline_len) |
199 | { |
200 | int ret; |
201 | void *fdt; |
202 | unsigned long old_kernel_pbase = ULONG_MAX; |
203 | unsigned long new_kernel_pbase = 0UL; |
204 | unsigned long initrd_pbase = 0UL; |
205 | unsigned long kernel_start; |
206 | struct elfhdr ehdr; |
207 | struct kexec_buf kbuf; |
208 | struct kexec_elf_info elf_info; |
209 | char *modified_cmdline = NULL; |
210 | |
211 | ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); |
212 | if (ret) |
213 | return ERR_PTR(error: ret); |
214 | |
215 | ret = elf_find_pbase(image, kernel_len, ehdr: &ehdr, elf_info: &elf_info, |
216 | old_pbase: &old_kernel_pbase, new_pbase: &new_kernel_pbase); |
217 | if (ret) |
218 | goto out; |
219 | kernel_start = image->start; |
220 | |
221 | /* Add the kernel binary to the image */ |
222 | ret = riscv_kexec_elf_load(image, ehdr: &ehdr, elf_info: &elf_info, |
223 | old_pbase: old_kernel_pbase, new_pbase: new_kernel_pbase); |
224 | if (ret) |
225 | goto out; |
226 | |
227 | kbuf.image = image; |
228 | kbuf.buf_min = new_kernel_pbase + kernel_len; |
229 | kbuf.buf_max = ULONG_MAX; |
230 | |
231 | #ifdef CONFIG_CRASH_DUMP |
232 | /* Add elfcorehdr */ |
233 | if (image->type == KEXEC_TYPE_CRASH) { |
234 | void *; |
235 | unsigned long ; |
236 | ret = prepare_elf_headers(addr: &headers, sz: &headers_sz); |
237 | if (ret) { |
238 | pr_err("Preparing elf core header failed\n" ); |
239 | goto out; |
240 | } |
241 | |
242 | kbuf.buffer = headers; |
243 | kbuf.bufsz = headers_sz; |
244 | kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; |
245 | kbuf.memsz = headers_sz; |
246 | kbuf.buf_align = ELF_CORE_HEADER_ALIGN; |
247 | kbuf.top_down = true; |
248 | |
249 | ret = kexec_add_buffer(kbuf: &kbuf); |
250 | if (ret) { |
251 | vfree(addr: headers); |
252 | goto out; |
253 | } |
254 | image->elf_headers = headers; |
255 | image->elf_load_addr = kbuf.mem; |
256 | image->elf_headers_sz = headers_sz; |
257 | |
258 | kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n" , |
259 | image->elf_load_addr, kbuf.bufsz, kbuf.memsz); |
260 | |
261 | /* Setup cmdline for kdump kernel case */ |
262 | modified_cmdline = setup_kdump_cmdline(image, cmdline, |
263 | cmdline_len); |
264 | if (!modified_cmdline) { |
265 | pr_err("Setting up cmdline for kdump kernel failed\n" ); |
266 | ret = -EINVAL; |
267 | goto out; |
268 | } |
269 | cmdline = modified_cmdline; |
270 | } |
271 | #endif |
272 | |
273 | #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY |
274 | /* Add purgatory to the image */ |
275 | kbuf.top_down = true; |
276 | kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; |
277 | ret = kexec_load_purgatory(image, kbuf: &kbuf); |
278 | if (ret) { |
279 | pr_err("Error loading purgatory ret=%d\n" , ret); |
280 | goto out; |
281 | } |
282 | kexec_dprintk("Loaded purgatory at 0x%lx\n" , kbuf.mem); |
283 | |
284 | ret = kexec_purgatory_get_set_symbol(image, name: "riscv_kernel_entry" , |
285 | buf: &kernel_start, |
286 | size: sizeof(kernel_start), get_value: 0); |
287 | if (ret) |
288 | pr_err("Error update purgatory ret=%d\n" , ret); |
289 | #endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */ |
290 | |
291 | /* Add the initrd to the image */ |
292 | if (initrd != NULL) { |
293 | kbuf.buffer = initrd; |
294 | kbuf.bufsz = kbuf.memsz = initrd_len; |
295 | kbuf.buf_align = PAGE_SIZE; |
296 | kbuf.top_down = true; |
297 | kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; |
298 | ret = kexec_add_buffer(kbuf: &kbuf); |
299 | if (ret) |
300 | goto out; |
301 | initrd_pbase = kbuf.mem; |
302 | kexec_dprintk("Loaded initrd at 0x%lx\n" , initrd_pbase); |
303 | } |
304 | |
305 | /* Add the DTB to the image */ |
306 | fdt = of_kexec_alloc_and_setup_fdt(image, initrd_load_addr: initrd_pbase, |
307 | initrd_len, cmdline, extra_fdt_size: 0); |
308 | if (!fdt) { |
309 | pr_err("Error setting up the new device tree.\n" ); |
310 | ret = -EINVAL; |
311 | goto out; |
312 | } |
313 | |
314 | fdt_pack(fdt); |
315 | kbuf.buffer = fdt; |
316 | kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); |
317 | kbuf.buf_align = PAGE_SIZE; |
318 | kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; |
319 | kbuf.top_down = true; |
320 | ret = kexec_add_buffer(kbuf: &kbuf); |
321 | if (ret) { |
322 | pr_err("Error add DTB kbuf ret=%d\n" , ret); |
323 | goto out_free_fdt; |
324 | } |
325 | /* Cache the fdt buffer address for memory cleanup */ |
326 | image->arch.fdt = fdt; |
327 | kexec_dprintk("Loaded device tree at 0x%lx\n" , kbuf.mem); |
328 | goto out; |
329 | |
330 | out_free_fdt: |
331 | kvfree(addr: fdt); |
332 | out: |
333 | kfree(objp: modified_cmdline); |
334 | kexec_free_elf_info(&elf_info); |
335 | return ret ? ERR_PTR(error: ret) : NULL; |
336 | } |
337 | |
338 | #define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) |
339 | #define RISCV_IMM_BITS 12 |
340 | #define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) |
341 | #define RISCV_CONST_HIGH_PART(x) \ |
342 | (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) |
343 | #define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) |
344 | |
345 | #define ENCODE_ITYPE_IMM(x) \ |
346 | (RV_X(x, 0, 12) << 20) |
347 | #define ENCODE_BTYPE_IMM(x) \ |
348 | ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ |
349 | (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) |
350 | #define ENCODE_UTYPE_IMM(x) \ |
351 | (RV_X(x, 12, 20) << 12) |
352 | #define ENCODE_JTYPE_IMM(x) \ |
353 | ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ |
354 | (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) |
355 | #define ENCODE_CBTYPE_IMM(x) \ |
356 | ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ |
357 | (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) |
358 | #define ENCODE_CJTYPE_IMM(x) \ |
359 | ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ |
360 | (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ |
361 | (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) |
362 | #define ENCODE_UJTYPE_IMM(x) \ |
363 | (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ |
364 | (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) |
365 | #define ENCODE_UITYPE_IMM(x) \ |
366 | (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) |
367 | |
368 | #define CLEAN_IMM(type, x) \ |
369 | ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) |
370 | |
371 | int arch_kexec_apply_relocations_add(struct purgatory_info *pi, |
372 | Elf_Shdr *section, |
373 | const Elf_Shdr *relsec, |
374 | const Elf_Shdr *symtab) |
375 | { |
376 | const char *strtab, *name, *shstrtab; |
377 | const Elf_Shdr *sechdrs; |
378 | Elf64_Rela *relas; |
379 | int i, r_type; |
380 | |
381 | /* String & section header string table */ |
382 | sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; |
383 | strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; |
384 | shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; |
385 | |
386 | relas = (void *)pi->ehdr + relsec->sh_offset; |
387 | |
388 | for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { |
389 | const Elf_Sym *sym; /* symbol to relocate */ |
390 | unsigned long addr; /* final location after relocation */ |
391 | unsigned long val; /* relocated symbol value */ |
392 | unsigned long sec_base; /* relocated symbol value */ |
393 | void *loc; /* tmp location to modify */ |
394 | |
395 | sym = (void *)pi->ehdr + symtab->sh_offset; |
396 | sym += ELF64_R_SYM(relas[i].r_info); |
397 | |
398 | if (sym->st_name) |
399 | name = strtab + sym->st_name; |
400 | else |
401 | name = shstrtab + sechdrs[sym->st_shndx].sh_name; |
402 | |
403 | loc = pi->purgatory_buf; |
404 | loc += section->sh_offset; |
405 | loc += relas[i].r_offset; |
406 | |
407 | if (sym->st_shndx == SHN_ABS) |
408 | sec_base = 0; |
409 | else if (sym->st_shndx >= pi->ehdr->e_shnum) { |
410 | pr_err("Invalid section %d for symbol %s\n" , |
411 | sym->st_shndx, name); |
412 | return -ENOEXEC; |
413 | } else |
414 | sec_base = pi->sechdrs[sym->st_shndx].sh_addr; |
415 | |
416 | val = sym->st_value; |
417 | val += sec_base; |
418 | val += relas[i].r_addend; |
419 | |
420 | addr = section->sh_addr + relas[i].r_offset; |
421 | |
422 | r_type = ELF64_R_TYPE(relas[i].r_info); |
423 | |
424 | switch (r_type) { |
425 | case R_RISCV_BRANCH: |
426 | *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | |
427 | ENCODE_BTYPE_IMM(val - addr); |
428 | break; |
429 | case R_RISCV_JAL: |
430 | *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | |
431 | ENCODE_JTYPE_IMM(val - addr); |
432 | break; |
433 | /* |
434 | * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I |
435 | * sym is expected to be next to R_RISCV_PCREL_HI20 |
436 | * in purgatory relsec. Handle it like R_RISCV_CALL |
437 | * sym, instead of searching the whole relsec. |
438 | */ |
439 | case R_RISCV_PCREL_HI20: |
440 | case R_RISCV_CALL_PLT: |
441 | case R_RISCV_CALL: |
442 | *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | |
443 | ENCODE_UJTYPE_IMM(val - addr); |
444 | break; |
445 | case R_RISCV_RVC_BRANCH: |
446 | *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | |
447 | ENCODE_CBTYPE_IMM(val - addr); |
448 | break; |
449 | case R_RISCV_RVC_JUMP: |
450 | *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | |
451 | ENCODE_CJTYPE_IMM(val - addr); |
452 | break; |
453 | case R_RISCV_ADD32: |
454 | *(u32 *)loc += val; |
455 | break; |
456 | case R_RISCV_SUB32: |
457 | *(u32 *)loc -= val; |
458 | break; |
459 | /* It has been applied by R_RISCV_PCREL_HI20 sym */ |
460 | case R_RISCV_PCREL_LO12_I: |
461 | case R_RISCV_ALIGN: |
462 | case R_RISCV_RELAX: |
463 | break; |
464 | default: |
465 | pr_err("Unknown rela relocation: %d\n" , r_type); |
466 | return -ENOEXEC; |
467 | } |
468 | } |
469 | return 0; |
470 | } |
471 | |
472 | const struct kexec_file_ops elf_kexec_ops = { |
473 | .probe = kexec_elf_probe, |
474 | .load = elf_kexec_load, |
475 | }; |
476 | |