1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * S390 version |
4 | * Copyright IBM Corp. 1999, 2012 |
5 | * Author(s): Hartmut Penner (hp@de.ibm.com), |
6 | * Martin Schwidefsky (schwidefsky@de.ibm.com) |
7 | * |
8 | * Derived from "arch/i386/kernel/setup.c" |
9 | * Copyright (C) 1995, Linus Torvalds |
10 | */ |
11 | |
12 | /* |
13 | * This file handles the architecture-dependent parts of initialization |
14 | */ |
15 | |
16 | #define KMSG_COMPONENT "setup" |
17 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
18 | |
19 | #include <linux/errno.h> |
20 | #include <linux/export.h> |
21 | #include <linux/sched.h> |
22 | #include <linux/sched/task.h> |
23 | #include <linux/cpu.h> |
24 | #include <linux/kernel.h> |
25 | #include <linux/memblock.h> |
26 | #include <linux/mm.h> |
27 | #include <linux/stddef.h> |
28 | #include <linux/unistd.h> |
29 | #include <linux/ptrace.h> |
30 | #include <linux/random.h> |
31 | #include <linux/user.h> |
32 | #include <linux/tty.h> |
33 | #include <linux/ioport.h> |
34 | #include <linux/delay.h> |
35 | #include <linux/init.h> |
36 | #include <linux/initrd.h> |
37 | #include <linux/root_dev.h> |
38 | #include <linux/console.h> |
39 | #include <linux/kernel_stat.h> |
40 | #include <linux/dma-map-ops.h> |
41 | #include <linux/device.h> |
42 | #include <linux/notifier.h> |
43 | #include <linux/pfn.h> |
44 | #include <linux/ctype.h> |
45 | #include <linux/reboot.h> |
46 | #include <linux/topology.h> |
47 | #include <linux/kexec.h> |
48 | #include <linux/crash_dump.h> |
49 | #include <linux/memory.h> |
50 | #include <linux/compat.h> |
51 | #include <linux/start_kernel.h> |
52 | #include <linux/hugetlb.h> |
53 | #include <linux/kmemleak.h> |
54 | |
55 | #include <asm/archrandom.h> |
56 | #include <asm/boot_data.h> |
57 | #include <asm/ipl.h> |
58 | #include <asm/facility.h> |
59 | #include <asm/smp.h> |
60 | #include <asm/mmu_context.h> |
61 | #include <asm/cpcmd.h> |
62 | #include <asm/abs_lowcore.h> |
63 | #include <asm/nmi.h> |
64 | #include <asm/irq.h> |
65 | #include <asm/page.h> |
66 | #include <asm/ptrace.h> |
67 | #include <asm/sections.h> |
68 | #include <asm/ebcdic.h> |
69 | #include <asm/diag.h> |
70 | #include <asm/os_info.h> |
71 | #include <asm/sclp.h> |
72 | #include <asm/stacktrace.h> |
73 | #include <asm/sysinfo.h> |
74 | #include <asm/numa.h> |
75 | #include <asm/alternative.h> |
76 | #include <asm/nospec-branch.h> |
77 | #include <asm/physmem_info.h> |
78 | #include <asm/maccess.h> |
79 | #include <asm/uv.h> |
80 | #include <asm/asm-offsets.h> |
81 | #include "entry.h" |
82 | |
83 | /* |
84 | * Machine setup.. |
85 | */ |
86 | unsigned int console_mode = 0; |
87 | EXPORT_SYMBOL(console_mode); |
88 | |
89 | unsigned int console_devno = -1; |
90 | EXPORT_SYMBOL(console_devno); |
91 | |
92 | unsigned int console_irq = -1; |
93 | EXPORT_SYMBOL(console_irq); |
94 | |
95 | /* |
96 | * Some code and data needs to stay below 2 GB, even when the kernel would be |
97 | * relocated above 2 GB, because it has to use 31 bit addresses. |
98 | * Such code and data is part of the .amode31 section. |
99 | */ |
100 | char __amode31_ref *__samode31 = _samode31; |
101 | char __amode31_ref *__eamode31 = _eamode31; |
102 | char __amode31_ref *__stext_amode31 = _stext_amode31; |
103 | char __amode31_ref *__etext_amode31 = _etext_amode31; |
104 | struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table; |
105 | struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table; |
106 | |
107 | /* |
108 | * Control registers CR2, CR5 and CR15 are initialized with addresses |
109 | * of tables that must be placed below 2G which is handled by the AMODE31 |
110 | * sections. |
111 | * Because the AMODE31 sections are relocated below 2G at startup, |
112 | * the content of control registers CR2, CR5 and CR15 must be updated |
113 | * with new addresses after the relocation. The initial initialization of |
114 | * control registers occurs in head64.S and then gets updated again after AMODE31 |
115 | * relocation. We must access the relevant AMODE31 tables indirectly via |
116 | * pointers placed in the .amode31.refs linker section. Those pointers get |
117 | * updated automatically during AMODE31 relocation and always contain a valid |
118 | * address within AMODE31 sections. |
119 | */ |
120 | |
121 | static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64); |
122 | |
123 | static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = { |
124 | [1] = 0xffffffffffffffff |
125 | }; |
126 | |
127 | static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = { |
128 | 0x80000000, 0, 0, 0, |
129 | 0x80000000, 0, 0, 0, |
130 | 0x80000000, 0, 0, 0, |
131 | 0x80000000, 0, 0, 0, |
132 | 0x80000000, 0, 0, 0, |
133 | 0x80000000, 0, 0, 0, |
134 | 0x80000000, 0, 0, 0, |
135 | 0x80000000, 0, 0, 0 |
136 | }; |
137 | |
138 | static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = { |
139 | 0, 0, 0x89000000, 0, |
140 | 0, 0, 0x8a000000, 0 |
141 | }; |
142 | |
143 | static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31; |
144 | static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31; |
145 | static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; |
146 | static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; |
147 | |
148 | unsigned long __bootdata_preserved(max_mappable); |
149 | unsigned long __bootdata(ident_map_size); |
150 | struct physmem_info __bootdata(physmem_info); |
151 | |
152 | unsigned long __bootdata_preserved(__kaslr_offset); |
153 | int __bootdata_preserved(__kaslr_enabled); |
154 | unsigned int __bootdata_preserved(zlib_dfltcc_support); |
155 | EXPORT_SYMBOL(zlib_dfltcc_support); |
156 | u64 __bootdata_preserved(stfle_fac_list[16]); |
157 | EXPORT_SYMBOL(stfle_fac_list); |
158 | u64 __bootdata_preserved(alt_stfle_fac_list[16]); |
159 | struct oldmem_data __bootdata_preserved(oldmem_data); |
160 | |
161 | unsigned long VMALLOC_START; |
162 | EXPORT_SYMBOL(VMALLOC_START); |
163 | |
164 | unsigned long VMALLOC_END; |
165 | EXPORT_SYMBOL(VMALLOC_END); |
166 | |
167 | struct page *vmemmap; |
168 | EXPORT_SYMBOL(vmemmap); |
169 | unsigned long vmemmap_size; |
170 | |
171 | unsigned long MODULES_VADDR; |
172 | unsigned long MODULES_END; |
173 | |
174 | /* An array with a pointer to the lowcore of every CPU. */ |
175 | struct lowcore *lowcore_ptr[NR_CPUS]; |
176 | EXPORT_SYMBOL(lowcore_ptr); |
177 | |
178 | DEFINE_STATIC_KEY_FALSE(cpu_has_bear); |
179 | |
180 | /* |
181 | * The Write Back bit position in the physaddr is given by the SLPC PCI. |
182 | * Leaving the mask zero always uses write through which is safe |
183 | */ |
184 | unsigned long mio_wb_bit_mask __ro_after_init; |
185 | |
186 | /* |
187 | * This is set up by the setup-routine at boot-time |
188 | * for S390 need to find out, what we have to setup |
189 | * using address 0x10400 ... |
190 | */ |
191 | |
192 | #include <asm/setup.h> |
193 | |
194 | /* |
195 | * condev= and conmode= setup parameter. |
196 | */ |
197 | |
198 | static int __init condev_setup(char *str) |
199 | { |
200 | int vdev; |
201 | |
202 | vdev = simple_strtoul(str, &str, 0); |
203 | if (vdev >= 0 && vdev < 65536) { |
204 | console_devno = vdev; |
205 | console_irq = -1; |
206 | } |
207 | return 1; |
208 | } |
209 | |
210 | __setup("condev=" , condev_setup); |
211 | |
212 | static void __init set_preferred_console(void) |
213 | { |
214 | if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) |
215 | add_preferred_console(name: "ttyS" , idx: 0, NULL); |
216 | else if (CONSOLE_IS_3270) |
217 | add_preferred_console(name: "tty3270" , idx: 0, NULL); |
218 | else if (CONSOLE_IS_VT220) |
219 | add_preferred_console(name: "ttysclp" , idx: 0, NULL); |
220 | else if (CONSOLE_IS_HVC) |
221 | add_preferred_console(name: "hvc" , idx: 0, NULL); |
222 | } |
223 | |
224 | static int __init conmode_setup(char *str) |
225 | { |
226 | #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) |
227 | if (!strcmp(str, "hwc" ) || !strcmp(str, "sclp" )) |
228 | SET_CONSOLE_SCLP; |
229 | #endif |
230 | #if defined(CONFIG_TN3215_CONSOLE) |
231 | if (!strcmp(str, "3215" )) |
232 | SET_CONSOLE_3215; |
233 | #endif |
234 | #if defined(CONFIG_TN3270_CONSOLE) |
235 | if (!strcmp(str, "3270" )) |
236 | SET_CONSOLE_3270; |
237 | #endif |
238 | set_preferred_console(); |
239 | return 1; |
240 | } |
241 | |
242 | __setup("conmode=" , conmode_setup); |
243 | |
244 | static void __init conmode_default(void) |
245 | { |
246 | char query_buffer[1024]; |
247 | char *ptr; |
248 | |
249 | if (MACHINE_IS_VM) { |
250 | cpcmd("QUERY CONSOLE" , query_buffer, 1024, NULL); |
251 | console_devno = simple_strtoul(query_buffer + 5, NULL, 16); |
252 | ptr = strstr(query_buffer, "SUBCHANNEL =" ); |
253 | console_irq = simple_strtoul(ptr + 13, NULL, 16); |
254 | cpcmd("QUERY TERM" , query_buffer, 1024, NULL); |
255 | ptr = strstr(query_buffer, "CONMODE" ); |
256 | /* |
257 | * Set the conmode to 3215 so that the device recognition |
258 | * will set the cu_type of the console to 3215. If the |
259 | * conmode is 3270 and we don't set it back then both |
260 | * 3215 and the 3270 driver will try to access the console |
261 | * device (3215 as console and 3270 as normal tty). |
262 | */ |
263 | cpcmd("TERM CONMODE 3215" , NULL, 0, NULL); |
264 | if (ptr == NULL) { |
265 | #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) |
266 | SET_CONSOLE_SCLP; |
267 | #endif |
268 | return; |
269 | } |
270 | if (str_has_prefix(str: ptr + 8, prefix: "3270" )) { |
271 | #if defined(CONFIG_TN3270_CONSOLE) |
272 | SET_CONSOLE_3270; |
273 | #elif defined(CONFIG_TN3215_CONSOLE) |
274 | SET_CONSOLE_3215; |
275 | #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) |
276 | SET_CONSOLE_SCLP; |
277 | #endif |
278 | } else if (str_has_prefix(str: ptr + 8, prefix: "3215" )) { |
279 | #if defined(CONFIG_TN3215_CONSOLE) |
280 | SET_CONSOLE_3215; |
281 | #elif defined(CONFIG_TN3270_CONSOLE) |
282 | SET_CONSOLE_3270; |
283 | #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) |
284 | SET_CONSOLE_SCLP; |
285 | #endif |
286 | } |
287 | } else if (MACHINE_IS_KVM) { |
288 | if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) |
289 | SET_CONSOLE_VT220; |
290 | else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) |
291 | SET_CONSOLE_SCLP; |
292 | else |
293 | SET_CONSOLE_HVC; |
294 | } else { |
295 | #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) |
296 | SET_CONSOLE_SCLP; |
297 | #endif |
298 | } |
299 | } |
300 | |
301 | #ifdef CONFIG_CRASH_DUMP |
302 | static void __init setup_zfcpdump(void) |
303 | { |
304 | if (!is_ipl_type_dump()) |
305 | return; |
306 | if (oldmem_data.start) |
307 | return; |
308 | strlcat(p: boot_command_line, q: " cio_ignore=all,!ipldev,!condev" , COMMAND_LINE_SIZE); |
309 | console_loglevel = 2; |
310 | } |
311 | #else |
312 | static inline void setup_zfcpdump(void) {} |
313 | #endif /* CONFIG_CRASH_DUMP */ |
314 | |
315 | /* |
316 | * Reboot, halt and power_off stubs. They just call _machine_restart, |
317 | * _machine_halt or _machine_power_off. |
318 | */ |
319 | |
320 | void machine_restart(char *command) |
321 | { |
322 | if ((!in_interrupt() && !in_atomic()) || oops_in_progress) |
323 | /* |
324 | * Only unblank the console if we are called in enabled |
325 | * context or a bust_spinlocks cleared the way for us. |
326 | */ |
327 | console_unblank(); |
328 | _machine_restart(command); |
329 | } |
330 | |
331 | void machine_halt(void) |
332 | { |
333 | if (!in_interrupt() || oops_in_progress) |
334 | /* |
335 | * Only unblank the console if we are called in enabled |
336 | * context or a bust_spinlocks cleared the way for us. |
337 | */ |
338 | console_unblank(); |
339 | _machine_halt(); |
340 | } |
341 | |
342 | void machine_power_off(void) |
343 | { |
344 | if (!in_interrupt() || oops_in_progress) |
345 | /* |
346 | * Only unblank the console if we are called in enabled |
347 | * context or a bust_spinlocks cleared the way for us. |
348 | */ |
349 | console_unblank(); |
350 | _machine_power_off(); |
351 | } |
352 | |
353 | /* |
354 | * Dummy power off function. |
355 | */ |
356 | void (*pm_power_off)(void) = machine_power_off; |
357 | EXPORT_SYMBOL_GPL(pm_power_off); |
358 | |
359 | void *restart_stack; |
360 | |
361 | unsigned long stack_alloc(void) |
362 | { |
363 | #ifdef CONFIG_VMAP_STACK |
364 | void *ret; |
365 | |
366 | ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, |
367 | NUMA_NO_NODE, caller: __builtin_return_address(0)); |
368 | kmemleak_not_leak(ptr: ret); |
369 | return (unsigned long)ret; |
370 | #else |
371 | return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); |
372 | #endif |
373 | } |
374 | |
375 | void stack_free(unsigned long stack) |
376 | { |
377 | #ifdef CONFIG_VMAP_STACK |
378 | vfree(addr: (void *) stack); |
379 | #else |
380 | free_pages(stack, THREAD_SIZE_ORDER); |
381 | #endif |
382 | } |
383 | |
384 | static unsigned long __init stack_alloc_early(void) |
385 | { |
386 | unsigned long stack; |
387 | |
388 | stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); |
389 | if (!stack) { |
390 | panic(fmt: "%s: Failed to allocate %lu bytes align=0x%lx\n" , |
391 | __func__, THREAD_SIZE, THREAD_SIZE); |
392 | } |
393 | return stack; |
394 | } |
395 | |
396 | static void __init setup_lowcore(void) |
397 | { |
398 | struct lowcore *lc, *abs_lc; |
399 | |
400 | /* |
401 | * Setup lowcore for boot cpu |
402 | */ |
403 | BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE); |
404 | lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc)); |
405 | if (!lc) |
406 | panic("%s: Failed to allocate %zu bytes align=%zx\n" , |
407 | __func__, sizeof(*lc), sizeof(*lc)); |
408 | |
409 | lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT; |
410 | lc->restart_psw.addr = __pa(restart_int_handler); |
411 | lc->external_new_psw.mask = PSW_KERNEL_BITS; |
412 | lc->external_new_psw.addr = (unsigned long) ext_int_handler; |
413 | lc->svc_new_psw.mask = PSW_KERNEL_BITS; |
414 | lc->svc_new_psw.addr = (unsigned long) system_call; |
415 | lc->program_new_psw.mask = PSW_KERNEL_BITS; |
416 | lc->program_new_psw.addr = (unsigned long) pgm_check_handler; |
417 | lc->mcck_new_psw.mask = PSW_KERNEL_BITS; |
418 | lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; |
419 | lc->io_new_psw.mask = PSW_KERNEL_BITS; |
420 | lc->io_new_psw.addr = (unsigned long) io_int_handler; |
421 | lc->clock_comparator = clock_comparator_max; |
422 | lc->current_task = (unsigned long)&init_task; |
423 | lc->lpp = LPP_MAGIC; |
424 | lc->machine_flags = S390_lowcore.machine_flags; |
425 | lc->preempt_count = S390_lowcore.preempt_count; |
426 | nmi_alloc_mcesa_early(&lc->mcesad); |
427 | lc->sys_enter_timer = S390_lowcore.sys_enter_timer; |
428 | lc->exit_timer = S390_lowcore.exit_timer; |
429 | lc->user_timer = S390_lowcore.user_timer; |
430 | lc->system_timer = S390_lowcore.system_timer; |
431 | lc->steal_timer = S390_lowcore.steal_timer; |
432 | lc->last_update_timer = S390_lowcore.last_update_timer; |
433 | lc->last_update_clock = S390_lowcore.last_update_clock; |
434 | /* |
435 | * Allocate the global restart stack which is the same for |
436 | * all CPUs in case *one* of them does a PSW restart. |
437 | */ |
438 | restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET); |
439 | lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET; |
440 | lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET; |
441 | lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET; |
442 | lc->kernel_stack = S390_lowcore.kernel_stack; |
443 | /* |
444 | * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant |
445 | * restart data to the absolute zero lowcore. This is necessary if |
446 | * PSW restart is done on an offline CPU that has lowcore zero. |
447 | */ |
448 | lc->restart_stack = (unsigned long) restart_stack; |
449 | lc->restart_fn = (unsigned long) do_restart; |
450 | lc->restart_data = 0; |
451 | lc->restart_source = -1U; |
452 | lc->spinlock_lockval = arch_spin_lockval(0); |
453 | lc->spinlock_index = 0; |
454 | arch_spin_lock_setup(0); |
455 | lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); |
456 | lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); |
457 | lc->preempt_count = PREEMPT_DISABLED; |
458 | lc->kernel_asce = S390_lowcore.kernel_asce; |
459 | lc->user_asce = S390_lowcore.user_asce; |
460 | |
461 | system_ctlreg_init_save_area(lc); |
462 | abs_lc = get_abs_lowcore(); |
463 | abs_lc->restart_stack = lc->restart_stack; |
464 | abs_lc->restart_fn = lc->restart_fn; |
465 | abs_lc->restart_data = lc->restart_data; |
466 | abs_lc->restart_source = lc->restart_source; |
467 | abs_lc->restart_psw = lc->restart_psw; |
468 | abs_lc->restart_flags = RESTART_FLAG_CTLREGS; |
469 | abs_lc->program_new_psw = lc->program_new_psw; |
470 | abs_lc->mcesad = lc->mcesad; |
471 | put_abs_lowcore(abs_lc); |
472 | |
473 | set_prefix(__pa(lc)); |
474 | lowcore_ptr[0] = lc; |
475 | if (abs_lowcore_map(0, lowcore_ptr[0], false)) |
476 | panic(fmt: "Couldn't setup absolute lowcore" ); |
477 | } |
478 | |
479 | static struct resource code_resource = { |
480 | .name = "Kernel code" , |
481 | .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, |
482 | }; |
483 | |
484 | static struct resource data_resource = { |
485 | .name = "Kernel data" , |
486 | .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, |
487 | }; |
488 | |
489 | static struct resource bss_resource = { |
490 | .name = "Kernel bss" , |
491 | .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, |
492 | }; |
493 | |
494 | static struct resource __initdata *standard_resources[] = { |
495 | &code_resource, |
496 | &data_resource, |
497 | &bss_resource, |
498 | }; |
499 | |
500 | static void __init setup_resources(void) |
501 | { |
502 | struct resource *res, *std_res, *sub_res; |
503 | phys_addr_t start, end; |
504 | int j; |
505 | u64 i; |
506 | |
507 | code_resource.start = __pa_symbol(_text); |
508 | code_resource.end = __pa_symbol(_etext) - 1; |
509 | data_resource.start = __pa_symbol(_etext); |
510 | data_resource.end = __pa_symbol(_edata) - 1; |
511 | bss_resource.start = __pa_symbol(__bss_start); |
512 | bss_resource.end = __pa_symbol(__bss_stop) - 1; |
513 | |
514 | for_each_mem_range(i, &start, &end) { |
515 | res = memblock_alloc(size: sizeof(*res), align: 8); |
516 | if (!res) |
517 | panic(fmt: "%s: Failed to allocate %zu bytes align=0x%x\n" , |
518 | __func__, sizeof(*res), 8); |
519 | res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; |
520 | |
521 | res->name = "System RAM" ; |
522 | res->start = start; |
523 | /* |
524 | * In memblock, end points to the first byte after the |
525 | * range while in resources, end points to the last byte in |
526 | * the range. |
527 | */ |
528 | res->end = end - 1; |
529 | request_resource(root: &iomem_resource, new: res); |
530 | |
531 | for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { |
532 | std_res = standard_resources[j]; |
533 | if (std_res->start < res->start || |
534 | std_res->start > res->end) |
535 | continue; |
536 | if (std_res->end > res->end) { |
537 | sub_res = memblock_alloc(size: sizeof(*sub_res), align: 8); |
538 | if (!sub_res) |
539 | panic(fmt: "%s: Failed to allocate %zu bytes align=0x%x\n" , |
540 | __func__, sizeof(*sub_res), 8); |
541 | *sub_res = *std_res; |
542 | sub_res->end = res->end; |
543 | std_res->start = res->end + 1; |
544 | request_resource(root: res, new: sub_res); |
545 | } else { |
546 | request_resource(root: res, new: std_res); |
547 | } |
548 | } |
549 | } |
550 | #ifdef CONFIG_CRASH_DUMP |
551 | /* |
552 | * Re-add removed crash kernel memory as reserved memory. This makes |
553 | * sure it will be mapped with the identity mapping and struct pages |
554 | * will be created, so it can be resized later on. |
555 | * However add it later since the crash kernel resource should not be |
556 | * part of the System RAM resource. |
557 | */ |
558 | if (crashk_res.end) { |
559 | memblock_add_node(base: crashk_res.start, size: resource_size(res: &crashk_res), |
560 | nid: 0, flags: MEMBLOCK_NONE); |
561 | memblock_reserve(base: crashk_res.start, size: resource_size(res: &crashk_res)); |
562 | insert_resource(parent: &iomem_resource, new: &crashk_res); |
563 | } |
564 | #endif |
565 | } |
566 | |
567 | static void __init setup_memory_end(void) |
568 | { |
569 | max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); |
570 | pr_notice("The maximum memory size is %luMB\n" , ident_map_size >> 20); |
571 | } |
572 | |
573 | #ifdef CONFIG_CRASH_DUMP |
574 | |
575 | /* |
576 | * When kdump is enabled, we have to ensure that no memory from the area |
577 | * [0 - crashkernel memory size] is set offline - it will be exchanged with |
578 | * the crashkernel memory region when kdump is triggered. The crashkernel |
579 | * memory region can never get offlined (pages are unmovable). |
580 | */ |
581 | static int kdump_mem_notifier(struct notifier_block *nb, |
582 | unsigned long action, void *data) |
583 | { |
584 | struct memory_notify *arg = data; |
585 | |
586 | if (action != MEM_GOING_OFFLINE) |
587 | return NOTIFY_OK; |
588 | if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) |
589 | return NOTIFY_BAD; |
590 | return NOTIFY_OK; |
591 | } |
592 | |
593 | static struct notifier_block kdump_mem_nb = { |
594 | .notifier_call = kdump_mem_notifier, |
595 | }; |
596 | |
597 | #endif |
598 | |
599 | /* |
600 | * Reserve page tables created by decompressor |
601 | */ |
602 | static void __init reserve_pgtables(void) |
603 | { |
604 | unsigned long start, end; |
605 | struct reserved_range *range; |
606 | |
607 | for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end) |
608 | memblock_reserve(start, end - start); |
609 | } |
610 | |
611 | /* |
612 | * Reserve memory for kdump kernel to be loaded with kexec |
613 | */ |
614 | static void __init reserve_crashkernel(void) |
615 | { |
616 | #ifdef CONFIG_CRASH_DUMP |
617 | unsigned long long crash_base, crash_size; |
618 | phys_addr_t low, high; |
619 | int rc; |
620 | |
621 | rc = parse_crashkernel(boot_command_line, ident_map_size, |
622 | &crash_size, &crash_base, NULL, NULL); |
623 | |
624 | crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); |
625 | crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); |
626 | if (rc || crash_size == 0) |
627 | return; |
628 | |
629 | if (memblock.memory.regions[0].size < crash_size) { |
630 | pr_info("crashkernel reservation failed: %s\n" , |
631 | "first memory chunk must be at least crashkernel size" ); |
632 | return; |
633 | } |
634 | |
635 | low = crash_base ?: oldmem_data.start; |
636 | high = low + crash_size; |
637 | if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) { |
638 | /* The crashkernel fits into OLDMEM, reuse OLDMEM */ |
639 | crash_base = low; |
640 | } else { |
641 | /* Find suitable area in free memory */ |
642 | low = max_t(unsigned long, crash_size, sclp.hsa_size); |
643 | high = crash_base ? crash_base + crash_size : ULONG_MAX; |
644 | |
645 | if (crash_base && crash_base < low) { |
646 | pr_info("crashkernel reservation failed: %s\n" , |
647 | "crash_base too low" ); |
648 | return; |
649 | } |
650 | low = crash_base ?: low; |
651 | crash_base = memblock_phys_alloc_range(size: crash_size, |
652 | KEXEC_CRASH_MEM_ALIGN, |
653 | start: low, end: high); |
654 | } |
655 | |
656 | if (!crash_base) { |
657 | pr_info("crashkernel reservation failed: %s\n" , |
658 | "no suitable area found" ); |
659 | return; |
660 | } |
661 | |
662 | if (register_memory_notifier(nb: &kdump_mem_nb)) { |
663 | memblock_phys_free(base: crash_base, size: crash_size); |
664 | return; |
665 | } |
666 | |
667 | if (!oldmem_data.start && MACHINE_IS_VM) |
668 | diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); |
669 | crashk_res.start = crash_base; |
670 | crashk_res.end = crash_base + crash_size - 1; |
671 | memblock_remove(base: crash_base, size: crash_size); |
672 | pr_info("Reserving %lluMB of memory at %lluMB " |
673 | "for crashkernel (System RAM: %luMB)\n" , |
674 | crash_size >> 20, crash_base >> 20, |
675 | (unsigned long)memblock.memory.total_size >> 20); |
676 | os_info_crashkernel_add(crash_base, crash_size); |
677 | #endif |
678 | } |
679 | |
680 | /* |
681 | * Reserve the initrd from being used by memblock |
682 | */ |
683 | static void __init reserve_initrd(void) |
684 | { |
685 | unsigned long addr, size; |
686 | |
687 | if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size)) |
688 | return; |
689 | initrd_start = (unsigned long)__va(addr); |
690 | initrd_end = initrd_start + size; |
691 | memblock_reserve(base: addr, size); |
692 | } |
693 | |
694 | /* |
695 | * Reserve the memory area used to pass the certificate lists |
696 | */ |
697 | static void __init reserve_certificate_list(void) |
698 | { |
699 | if (ipl_cert_list_addr) |
700 | memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size); |
701 | } |
702 | |
703 | static void __init reserve_physmem_info(void) |
704 | { |
705 | unsigned long addr, size; |
706 | |
707 | if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) |
708 | memblock_reserve(base: addr, size); |
709 | } |
710 | |
711 | static void __init free_physmem_info(void) |
712 | { |
713 | unsigned long addr, size; |
714 | |
715 | if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) |
716 | memblock_phys_free(base: addr, size); |
717 | } |
718 | |
719 | static void __init memblock_add_physmem_info(void) |
720 | { |
721 | unsigned long start, end; |
722 | int i; |
723 | |
724 | pr_debug("physmem info source: %s (%hhd)\n" , |
725 | get_physmem_info_source(), physmem_info.info_source); |
726 | /* keep memblock lists close to the kernel */ |
727 | memblock_set_bottom_up(enable: true); |
728 | for_each_physmem_usable_range(i, &start, &end) |
729 | memblock_add(base: start, size: end - start); |
730 | for_each_physmem_online_range(i, &start, &end) |
731 | memblock_physmem_add(start, end - start); |
732 | memblock_set_bottom_up(enable: false); |
733 | memblock_set_node(base: 0, ULONG_MAX, type: &memblock.memory, nid: 0); |
734 | } |
735 | |
736 | /* |
737 | * Reserve memory used for lowcore/command line/kernel image. |
738 | */ |
739 | static void __init reserve_kernel(void) |
740 | { |
741 | memblock_reserve(0, STARTUP_NORMAL_OFFSET); |
742 | memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); |
743 | memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); |
744 | memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31); |
745 | memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); |
746 | memblock_reserve(__pa(_stext), size: _end - _stext); |
747 | } |
748 | |
749 | static void __init setup_memory(void) |
750 | { |
751 | phys_addr_t start, end; |
752 | u64 i; |
753 | |
754 | /* |
755 | * Init storage key for present memory |
756 | */ |
757 | for_each_mem_range(i, &start, &end) |
758 | storage_key_init_range(start, end); |
759 | |
760 | psw_set_key(PAGE_DEFAULT_KEY); |
761 | } |
762 | |
763 | static void __init relocate_amode31_section(void) |
764 | { |
765 | unsigned long amode31_size = __eamode31 - __samode31; |
766 | long amode31_offset, *ptr; |
767 | |
768 | amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31; |
769 | pr_info("Relocating AMODE31 section of size 0x%08lx\n" , amode31_size); |
770 | |
771 | /* Move original AMODE31 section to the new one */ |
772 | memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size); |
773 | /* Zero out the old AMODE31 section to catch invalid accesses within it */ |
774 | memset(__samode31, 0, amode31_size); |
775 | |
776 | /* Update all AMODE31 region references */ |
777 | for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++) |
778 | *ptr += amode31_offset; |
779 | } |
780 | |
781 | /* This must be called after AMODE31 relocation */ |
782 | static void __init setup_cr(void) |
783 | { |
784 | union ctlreg2 cr2; |
785 | union ctlreg5 cr5; |
786 | union ctlreg15 cr15; |
787 | |
788 | __ctl_duct[1] = (unsigned long)__ctl_aste; |
789 | __ctl_duct[2] = (unsigned long)__ctl_aste; |
790 | __ctl_duct[4] = (unsigned long)__ctl_duald; |
791 | |
792 | /* Update control registers CR2, CR5 and CR15 */ |
793 | local_ctl_store(2, &cr2.reg); |
794 | local_ctl_store(5, &cr5.reg); |
795 | local_ctl_store(15, &cr15.reg); |
796 | cr2.ducto = (unsigned long)__ctl_duct >> 6; |
797 | cr5.pasteo = (unsigned long)__ctl_duct >> 6; |
798 | cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3; |
799 | system_ctl_load(2, &cr2.reg); |
800 | system_ctl_load(5, &cr5.reg); |
801 | system_ctl_load(15, &cr15.reg); |
802 | } |
803 | |
804 | /* |
805 | * Add system information as device randomness |
806 | */ |
807 | static void __init setup_randomness(void) |
808 | { |
809 | struct sysinfo_3_2_2 *vmms; |
810 | |
811 | vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE); |
812 | if (!vmms) |
813 | panic(fmt: "Failed to allocate memory for sysinfo structure\n" ); |
814 | if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) |
815 | add_device_randomness(buf: &vmms->vm, len: sizeof(vmms->vm[0]) * vmms->count); |
816 | memblock_free(ptr: vmms, PAGE_SIZE); |
817 | |
818 | if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) |
819 | static_branch_enable(&s390_arch_random_available); |
820 | } |
821 | |
822 | /* |
823 | * Issue diagnose 318 to set the control program name and |
824 | * version codes. |
825 | */ |
826 | static void __init setup_control_program_code(void) |
827 | { |
828 | union diag318_info diag318_info = { |
829 | .cpnc = CPNC_LINUX, |
830 | .cpvc = 0, |
831 | }; |
832 | |
833 | if (!sclp.has_diag318) |
834 | return; |
835 | |
836 | diag_stat_inc(DIAG_STAT_X318); |
837 | asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val)); |
838 | } |
839 | |
840 | /* |
841 | * Print the component list from the IPL report |
842 | */ |
843 | static void __init log_component_list(void) |
844 | { |
845 | struct ipl_rb_component_entry *ptr, *end; |
846 | char *str; |
847 | |
848 | if (!early_ipl_comp_list_addr) |
849 | return; |
850 | if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL) |
851 | pr_info("Linux is running with Secure-IPL enabled\n" ); |
852 | else |
853 | pr_info("Linux is running with Secure-IPL disabled\n" ); |
854 | ptr = __va(early_ipl_comp_list_addr); |
855 | end = (void *) ptr + early_ipl_comp_list_size; |
856 | pr_info("The IPL report contains the following components:\n" ); |
857 | while (ptr < end) { |
858 | if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) { |
859 | if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED) |
860 | str = "signed, verified" ; |
861 | else |
862 | str = "signed, verification failed" ; |
863 | } else { |
864 | str = "not signed" ; |
865 | } |
866 | pr_info("%016llx - %016llx (%s)\n" , |
867 | ptr->addr, ptr->addr + ptr->len, str); |
868 | ptr++; |
869 | } |
870 | } |
871 | |
872 | /* |
873 | * Setup function called from init/main.c just after the banner |
874 | * was printed. |
875 | */ |
876 | |
877 | void __init setup_arch(char **cmdline_p) |
878 | { |
879 | /* |
880 | * print what head.S has found out about the machine |
881 | */ |
882 | if (MACHINE_IS_VM) |
883 | pr_info("Linux is running as a z/VM " |
884 | "guest operating system in 64-bit mode\n" ); |
885 | else if (MACHINE_IS_KVM) |
886 | pr_info("Linux is running under KVM in 64-bit mode\n" ); |
887 | else if (MACHINE_IS_LPAR) |
888 | pr_info("Linux is running natively in 64-bit mode\n" ); |
889 | else |
890 | pr_info("Linux is running as a guest in 64-bit mode\n" ); |
891 | |
892 | log_component_list(); |
893 | |
894 | /* Have one command line that is parsed and saved in /proc/cmdline */ |
895 | /* boot_command_line has been already set up in early.c */ |
896 | *cmdline_p = boot_command_line; |
897 | |
898 | ROOT_DEV = Root_RAM0; |
899 | |
900 | setup_initial_init_mm(start_code: _text, end_code: _etext, end_data: _edata, brk: _end); |
901 | |
902 | if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) |
903 | nospec_auto_detect(); |
904 | |
905 | jump_label_init(); |
906 | parse_early_param(); |
907 | #ifdef CONFIG_CRASH_DUMP |
908 | /* Deactivate elfcorehdr= kernel parameter */ |
909 | elfcorehdr_addr = ELFCORE_ADDR_MAX; |
910 | #endif |
911 | |
912 | os_info_init(); |
913 | setup_ipl(); |
914 | setup_control_program_code(); |
915 | |
916 | /* Do some memory reservations *before* memory is added to memblock */ |
917 | reserve_pgtables(); |
918 | reserve_kernel(); |
919 | reserve_initrd(); |
920 | reserve_certificate_list(); |
921 | reserve_physmem_info(); |
922 | memblock_set_current_limit(ident_map_size); |
923 | memblock_allow_resize(); |
924 | |
925 | /* Get information about *all* installed memory */ |
926 | memblock_add_physmem_info(); |
927 | |
928 | free_physmem_info(); |
929 | setup_memory_end(); |
930 | memblock_dump_all(); |
931 | setup_memory(); |
932 | |
933 | relocate_amode31_section(); |
934 | setup_cr(); |
935 | setup_uv(); |
936 | dma_contiguous_reserve(ident_map_size); |
937 | vmcp_cma_reserve(); |
938 | if (MACHINE_HAS_EDAT2) |
939 | hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); |
940 | |
941 | reserve_crashkernel(); |
942 | #ifdef CONFIG_CRASH_DUMP |
943 | /* |
944 | * Be aware that smp_save_dump_secondary_cpus() triggers a system reset. |
945 | * Therefore CPU and device initialization should be done afterwards. |
946 | */ |
947 | smp_save_dump_secondary_cpus(); |
948 | #endif |
949 | |
950 | setup_resources(); |
951 | setup_lowcore(); |
952 | smp_fill_possible_mask(); |
953 | cpu_detect_mhz_feature(); |
954 | cpu_init(); |
955 | numa_setup(); |
956 | smp_detect_cpus(); |
957 | topology_init_early(); |
958 | |
959 | if (test_facility(193)) |
960 | static_branch_enable(&cpu_has_bear); |
961 | |
962 | /* |
963 | * Create kernel page tables. |
964 | */ |
965 | paging_init(); |
966 | |
967 | /* |
968 | * After paging_init created the kernel page table, the new PSWs |
969 | * in lowcore can now run with DAT enabled. |
970 | */ |
971 | #ifdef CONFIG_CRASH_DUMP |
972 | smp_save_dump_ipl_cpu(); |
973 | #endif |
974 | |
975 | /* Setup default console */ |
976 | conmode_default(); |
977 | set_preferred_console(); |
978 | |
979 | apply_alternative_instructions(); |
980 | if (IS_ENABLED(CONFIG_EXPOLINE)) |
981 | nospec_init_branches(); |
982 | |
983 | /* Setup zfcp/nvme dump support */ |
984 | setup_zfcpdump(); |
985 | |
986 | /* Add system specific data to the random pool */ |
987 | setup_randomness(); |
988 | } |
989 | |