1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Load ELF vmlinux file for the kexec_file_load syscall.
4 *
5 * Copyright (C) 2021 Huawei Technologies Co, Ltd.
6 *
7 * Author: Liao Chang (liaochang1@huawei.com)
8 *
9 * Based on kexec-tools' kexec-elf-riscv.c, heavily modified
10 * for kernel.
11 */
12
13#define pr_fmt(fmt) "kexec_image: " fmt
14
15#include <linux/elf.h>
16#include <linux/kexec.h>
17#include <linux/slab.h>
18#include <linux/of.h>
19#include <linux/libfdt.h>
20#include <linux/types.h>
21#include <linux/memblock.h>
22#include <asm/setup.h>
23
24int arch_kimage_file_post_load_cleanup(struct kimage *image)
25{
26 kvfree(addr: image->arch.fdt);
27 image->arch.fdt = NULL;
28
29 vfree(addr: image->elf_headers);
30 image->elf_headers = NULL;
31 image->elf_headers_sz = 0;
32
33 return kexec_image_post_load_cleanup_default(image);
34}
35
36static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
37 struct kexec_elf_info *elf_info, unsigned long old_pbase,
38 unsigned long new_pbase)
39{
40 int i;
41 int ret = 0;
42 size_t size;
43 struct kexec_buf kbuf;
44 const struct elf_phdr *phdr;
45
46 kbuf.image = image;
47
48 for (i = 0; i < ehdr->e_phnum; i++) {
49 phdr = &elf_info->proghdrs[i];
50 if (phdr->p_type != PT_LOAD)
51 continue;
52
53 size = phdr->p_filesz;
54 if (size > phdr->p_memsz)
55 size = phdr->p_memsz;
56
57 kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
58 kbuf.bufsz = size;
59 kbuf.buf_align = phdr->p_align;
60 kbuf.mem = phdr->p_paddr - old_pbase + new_pbase;
61 kbuf.memsz = phdr->p_memsz;
62 kbuf.top_down = false;
63 ret = kexec_add_buffer(kbuf: &kbuf);
64 if (ret)
65 break;
66 }
67
68 return ret;
69}
70
71/*
72 * Go through the available phsyical memory regions and find one that hold
73 * an image of the specified size.
74 */
75static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
76 struct elfhdr *ehdr, struct kexec_elf_info *elf_info,
77 unsigned long *old_pbase, unsigned long *new_pbase)
78{
79 int i;
80 int ret;
81 struct kexec_buf kbuf;
82 const struct elf_phdr *phdr;
83 unsigned long lowest_paddr = ULONG_MAX;
84 unsigned long lowest_vaddr = ULONG_MAX;
85
86 for (i = 0; i < ehdr->e_phnum; i++) {
87 phdr = &elf_info->proghdrs[i];
88 if (phdr->p_type != PT_LOAD)
89 continue;
90
91 if (lowest_paddr > phdr->p_paddr)
92 lowest_paddr = phdr->p_paddr;
93
94 if (lowest_vaddr > phdr->p_vaddr)
95 lowest_vaddr = phdr->p_vaddr;
96 }
97
98 kbuf.image = image;
99 kbuf.buf_min = lowest_paddr;
100 kbuf.buf_max = ULONG_MAX;
101
102 /*
103 * Current riscv boot protocol requires 2MB alignment for
104 * RV64 and 4MB alignment for RV32
105 *
106 */
107 kbuf.buf_align = PMD_SIZE;
108 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
109 kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE);
110 kbuf.top_down = false;
111 ret = arch_kexec_locate_mem_hole(kbuf: &kbuf);
112 if (!ret) {
113 *old_pbase = lowest_paddr;
114 *new_pbase = kbuf.mem;
115 image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem;
116 }
117 return ret;
118}
119
120#ifdef CONFIG_CRASH_DUMP
121static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
122{
123 unsigned int *nr_ranges = arg;
124
125 (*nr_ranges)++;
126 return 0;
127}
128
129static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
130{
131 struct crash_mem *cmem = arg;
132
133 cmem->ranges[cmem->nr_ranges].start = res->start;
134 cmem->ranges[cmem->nr_ranges].end = res->end;
135 cmem->nr_ranges++;
136
137 return 0;
138}
139
140static int prepare_elf_headers(void **addr, unsigned long *sz)
141{
142 struct crash_mem *cmem;
143 unsigned int nr_ranges;
144 int ret;
145
146 nr_ranges = 1; /* For exclusion of crashkernel region */
147 walk_system_ram_res(start: 0, end: -1, arg: &nr_ranges, func: get_nr_ram_ranges_callback);
148
149 cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
150 if (!cmem)
151 return -ENOMEM;
152
153 cmem->max_nr_ranges = nr_ranges;
154 cmem->nr_ranges = 0;
155 ret = walk_system_ram_res(start: 0, end: -1, arg: cmem, func: prepare_elf64_ram_headers_callback);
156 if (ret)
157 goto out;
158
159 /* Exclude crashkernel region */
160 ret = crash_exclude_mem_range(mem: cmem, mstart: crashk_res.start, mend: crashk_res.end);
161 if (!ret)
162 ret = crash_prepare_elf64_headers(mem: cmem, need_kernel_map: true, addr, sz);
163
164out:
165 kfree(objp: cmem);
166 return ret;
167}
168
169static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
170 unsigned long cmdline_len)
171{
172 int elfcorehdr_strlen;
173 char *cmdline_ptr;
174
175 cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
176 if (!cmdline_ptr)
177 return NULL;
178
179 elfcorehdr_strlen = sprintf(buf: cmdline_ptr, fmt: "elfcorehdr=0x%lx ",
180 image->elf_load_addr);
181
182 if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
183 pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
184 kfree(objp: cmdline_ptr);
185 return NULL;
186 }
187
188 memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
189 /* Ensure it's nul terminated */
190 cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
191 return cmdline_ptr;
192}
193#endif
194
195static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
196 unsigned long kernel_len, char *initrd,
197 unsigned long initrd_len, char *cmdline,
198 unsigned long cmdline_len)
199{
200 int ret;
201 void *fdt;
202 unsigned long old_kernel_pbase = ULONG_MAX;
203 unsigned long new_kernel_pbase = 0UL;
204 unsigned long initrd_pbase = 0UL;
205 unsigned long kernel_start;
206 struct elfhdr ehdr;
207 struct kexec_buf kbuf;
208 struct kexec_elf_info elf_info;
209 char *modified_cmdline = NULL;
210
211 ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
212 if (ret)
213 return ERR_PTR(error: ret);
214
215 ret = elf_find_pbase(image, kernel_len, ehdr: &ehdr, elf_info: &elf_info,
216 old_pbase: &old_kernel_pbase, new_pbase: &new_kernel_pbase);
217 if (ret)
218 goto out;
219 kernel_start = image->start;
220
221 /* Add the kernel binary to the image */
222 ret = riscv_kexec_elf_load(image, ehdr: &ehdr, elf_info: &elf_info,
223 old_pbase: old_kernel_pbase, new_pbase: new_kernel_pbase);
224 if (ret)
225 goto out;
226
227 kbuf.image = image;
228 kbuf.buf_min = new_kernel_pbase + kernel_len;
229 kbuf.buf_max = ULONG_MAX;
230
231#ifdef CONFIG_CRASH_DUMP
232 /* Add elfcorehdr */
233 if (image->type == KEXEC_TYPE_CRASH) {
234 void *headers;
235 unsigned long headers_sz;
236 ret = prepare_elf_headers(addr: &headers, sz: &headers_sz);
237 if (ret) {
238 pr_err("Preparing elf core header failed\n");
239 goto out;
240 }
241
242 kbuf.buffer = headers;
243 kbuf.bufsz = headers_sz;
244 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
245 kbuf.memsz = headers_sz;
246 kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
247 kbuf.top_down = true;
248
249 ret = kexec_add_buffer(kbuf: &kbuf);
250 if (ret) {
251 vfree(addr: headers);
252 goto out;
253 }
254 image->elf_headers = headers;
255 image->elf_load_addr = kbuf.mem;
256 image->elf_headers_sz = headers_sz;
257
258 kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
259 image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
260
261 /* Setup cmdline for kdump kernel case */
262 modified_cmdline = setup_kdump_cmdline(image, cmdline,
263 cmdline_len);
264 if (!modified_cmdline) {
265 pr_err("Setting up cmdline for kdump kernel failed\n");
266 ret = -EINVAL;
267 goto out;
268 }
269 cmdline = modified_cmdline;
270 }
271#endif
272
273#ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
274 /* Add purgatory to the image */
275 kbuf.top_down = true;
276 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
277 ret = kexec_load_purgatory(image, kbuf: &kbuf);
278 if (ret) {
279 pr_err("Error loading purgatory ret=%d\n", ret);
280 goto out;
281 }
282 kexec_dprintk("Loaded purgatory at 0x%lx\n", kbuf.mem);
283
284 ret = kexec_purgatory_get_set_symbol(image, name: "riscv_kernel_entry",
285 buf: &kernel_start,
286 size: sizeof(kernel_start), get_value: 0);
287 if (ret)
288 pr_err("Error update purgatory ret=%d\n", ret);
289#endif /* CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY */
290
291 /* Add the initrd to the image */
292 if (initrd != NULL) {
293 kbuf.buffer = initrd;
294 kbuf.bufsz = kbuf.memsz = initrd_len;
295 kbuf.buf_align = PAGE_SIZE;
296 kbuf.top_down = true;
297 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
298 ret = kexec_add_buffer(kbuf: &kbuf);
299 if (ret)
300 goto out;
301 initrd_pbase = kbuf.mem;
302 kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_pbase);
303 }
304
305 /* Add the DTB to the image */
306 fdt = of_kexec_alloc_and_setup_fdt(image, initrd_load_addr: initrd_pbase,
307 initrd_len, cmdline, extra_fdt_size: 0);
308 if (!fdt) {
309 pr_err("Error setting up the new device tree.\n");
310 ret = -EINVAL;
311 goto out;
312 }
313
314 fdt_pack(fdt);
315 kbuf.buffer = fdt;
316 kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
317 kbuf.buf_align = PAGE_SIZE;
318 kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
319 kbuf.top_down = true;
320 ret = kexec_add_buffer(kbuf: &kbuf);
321 if (ret) {
322 pr_err("Error add DTB kbuf ret=%d\n", ret);
323 goto out_free_fdt;
324 }
325 /* Cache the fdt buffer address for memory cleanup */
326 image->arch.fdt = fdt;
327 kexec_dprintk("Loaded device tree at 0x%lx\n", kbuf.mem);
328 goto out;
329
330out_free_fdt:
331 kvfree(addr: fdt);
332out:
333 kfree(objp: modified_cmdline);
334 kexec_free_elf_info(&elf_info);
335 return ret ? ERR_PTR(error: ret) : NULL;
336}
337
338#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
339#define RISCV_IMM_BITS 12
340#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS)
341#define RISCV_CONST_HIGH_PART(x) \
342 (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1))
343#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x))
344
345#define ENCODE_ITYPE_IMM(x) \
346 (RV_X(x, 0, 12) << 20)
347#define ENCODE_BTYPE_IMM(x) \
348 ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \
349 (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31))
350#define ENCODE_UTYPE_IMM(x) \
351 (RV_X(x, 12, 20) << 12)
352#define ENCODE_JTYPE_IMM(x) \
353 ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \
354 (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31))
355#define ENCODE_CBTYPE_IMM(x) \
356 ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \
357 (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12))
358#define ENCODE_CJTYPE_IMM(x) \
359 ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \
360 (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \
361 (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12))
362#define ENCODE_UJTYPE_IMM(x) \
363 (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \
364 (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32))
365#define ENCODE_UITYPE_IMM(x) \
366 (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32))
367
368#define CLEAN_IMM(type, x) \
369 ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x))
370
371int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
372 Elf_Shdr *section,
373 const Elf_Shdr *relsec,
374 const Elf_Shdr *symtab)
375{
376 const char *strtab, *name, *shstrtab;
377 const Elf_Shdr *sechdrs;
378 Elf64_Rela *relas;
379 int i, r_type;
380
381 /* String & section header string table */
382 sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
383 strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
384 shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
385
386 relas = (void *)pi->ehdr + relsec->sh_offset;
387
388 for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
389 const Elf_Sym *sym; /* symbol to relocate */
390 unsigned long addr; /* final location after relocation */
391 unsigned long val; /* relocated symbol value */
392 unsigned long sec_base; /* relocated symbol value */
393 void *loc; /* tmp location to modify */
394
395 sym = (void *)pi->ehdr + symtab->sh_offset;
396 sym += ELF64_R_SYM(relas[i].r_info);
397
398 if (sym->st_name)
399 name = strtab + sym->st_name;
400 else
401 name = shstrtab + sechdrs[sym->st_shndx].sh_name;
402
403 loc = pi->purgatory_buf;
404 loc += section->sh_offset;
405 loc += relas[i].r_offset;
406
407 if (sym->st_shndx == SHN_ABS)
408 sec_base = 0;
409 else if (sym->st_shndx >= pi->ehdr->e_shnum) {
410 pr_err("Invalid section %d for symbol %s\n",
411 sym->st_shndx, name);
412 return -ENOEXEC;
413 } else
414 sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
415
416 val = sym->st_value;
417 val += sec_base;
418 val += relas[i].r_addend;
419
420 addr = section->sh_addr + relas[i].r_offset;
421
422 r_type = ELF64_R_TYPE(relas[i].r_info);
423
424 switch (r_type) {
425 case R_RISCV_BRANCH:
426 *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) |
427 ENCODE_BTYPE_IMM(val - addr);
428 break;
429 case R_RISCV_JAL:
430 *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) |
431 ENCODE_JTYPE_IMM(val - addr);
432 break;
433 /*
434 * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I
435 * sym is expected to be next to R_RISCV_PCREL_HI20
436 * in purgatory relsec. Handle it like R_RISCV_CALL
437 * sym, instead of searching the whole relsec.
438 */
439 case R_RISCV_PCREL_HI20:
440 case R_RISCV_CALL_PLT:
441 case R_RISCV_CALL:
442 *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
443 ENCODE_UJTYPE_IMM(val - addr);
444 break;
445 case R_RISCV_RVC_BRANCH:
446 *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) |
447 ENCODE_CBTYPE_IMM(val - addr);
448 break;
449 case R_RISCV_RVC_JUMP:
450 *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) |
451 ENCODE_CJTYPE_IMM(val - addr);
452 break;
453 case R_RISCV_ADD32:
454 *(u32 *)loc += val;
455 break;
456 case R_RISCV_SUB32:
457 *(u32 *)loc -= val;
458 break;
459 /* It has been applied by R_RISCV_PCREL_HI20 sym */
460 case R_RISCV_PCREL_LO12_I:
461 case R_RISCV_ALIGN:
462 case R_RISCV_RELAX:
463 break;
464 default:
465 pr_err("Unknown rela relocation: %d\n", r_type);
466 return -ENOEXEC;
467 }
468 }
469 return 0;
470}
471
472const struct kexec_file_ops elf_kexec_ops = {
473 .probe = kexec_elf_probe,
474 .load = elf_kexec_load,
475};
476

source code of linux/arch/riscv/kernel/elf_kexec.c