1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Procedures for creating, accessing and interpreting the device tree. |
4 | * |
5 | * Paul Mackerras August 1996. |
6 | * Copyright (C) 1996-2005 Paul Mackerras. |
7 | * |
8 | * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. |
9 | * {engebret|bergner}@us.ibm.com |
10 | */ |
11 | |
12 | #undef DEBUG |
13 | |
14 | #include <linux/kernel.h> |
15 | #include <linux/string.h> |
16 | #include <linux/init.h> |
17 | #include <linux/threads.h> |
18 | #include <linux/spinlock.h> |
19 | #include <linux/types.h> |
20 | #include <linux/pci.h> |
21 | #include <linux/delay.h> |
22 | #include <linux/initrd.h> |
23 | #include <linux/bitops.h> |
24 | #include <linux/export.h> |
25 | #include <linux/kexec.h> |
26 | #include <linux/irq.h> |
27 | #include <linux/memblock.h> |
28 | #include <linux/of.h> |
29 | #include <linux/of_fdt.h> |
30 | #include <linux/libfdt.h> |
31 | #include <linux/cpu.h> |
32 | #include <linux/pgtable.h> |
33 | #include <linux/seq_buf.h> |
34 | |
35 | #include <asm/rtas.h> |
36 | #include <asm/page.h> |
37 | #include <asm/processor.h> |
38 | #include <asm/irq.h> |
39 | #include <asm/io.h> |
40 | #include <asm/kdump.h> |
41 | #include <asm/smp.h> |
42 | #include <asm/mmu.h> |
43 | #include <asm/paca.h> |
44 | #include <asm/powernv.h> |
45 | #include <asm/iommu.h> |
46 | #include <asm/btext.h> |
47 | #include <asm/sections.h> |
48 | #include <asm/setup.h> |
49 | #include <asm/pci-bridge.h> |
50 | #include <asm/kexec.h> |
51 | #include <asm/opal.h> |
52 | #include <asm/fadump.h> |
53 | #include <asm/epapr_hcalls.h> |
54 | #include <asm/firmware.h> |
55 | #include <asm/dt_cpu_ftrs.h> |
56 | #include <asm/drmem.h> |
57 | #include <asm/ultravisor.h> |
58 | #include <asm/prom.h> |
59 | #include <asm/plpks.h> |
60 | |
61 | #include <mm/mmu_decl.h> |
62 | |
63 | #ifdef DEBUG |
64 | #define DBG(fmt...) printk(KERN_ERR fmt) |
65 | #else |
66 | #define DBG(fmt...) |
67 | #endif |
68 | |
69 | int *chip_id_lookup_table; |
70 | |
71 | #ifdef CONFIG_PPC64 |
72 | int __initdata iommu_is_off; |
73 | int __initdata iommu_force_on; |
74 | unsigned long tce_alloc_start, tce_alloc_end; |
75 | u64 ppc64_rma_size; |
76 | unsigned int boot_cpu_node_count __ro_after_init; |
77 | #endif |
78 | static phys_addr_t first_memblock_size; |
79 | static int __initdata boot_cpu_count; |
80 | |
81 | static int __init early_parse_mem(char *p) |
82 | { |
83 | if (!p) |
84 | return 1; |
85 | |
86 | memory_limit = PAGE_ALIGN(memparse(p, &p)); |
87 | DBG("memory limit = 0x%llx\n" , memory_limit); |
88 | |
89 | return 0; |
90 | } |
91 | early_param("mem" , early_parse_mem); |
92 | |
93 | /* |
94 | * overlaps_initrd - check for overlap with page aligned extension of |
95 | * initrd. |
96 | */ |
97 | static inline int overlaps_initrd(unsigned long start, unsigned long size) |
98 | { |
99 | #ifdef CONFIG_BLK_DEV_INITRD |
100 | if (!initrd_start) |
101 | return 0; |
102 | |
103 | return (start + size) > ALIGN_DOWN(initrd_start, PAGE_SIZE) && |
104 | start <= ALIGN(initrd_end, PAGE_SIZE); |
105 | #else |
106 | return 0; |
107 | #endif |
108 | } |
109 | |
110 | /** |
111 | * move_device_tree - move tree to an unused area, if needed. |
112 | * |
113 | * The device tree may be allocated beyond our memory limit, or inside the |
114 | * crash kernel region for kdump, or within the page aligned range of initrd. |
115 | * If so, move it out of the way. |
116 | */ |
117 | static void __init move_device_tree(void) |
118 | { |
119 | unsigned long start, size; |
120 | void *p; |
121 | |
122 | DBG("-> move_device_tree\n" ); |
123 | |
124 | start = __pa(initial_boot_params); |
125 | size = fdt_totalsize(initial_boot_params); |
126 | |
127 | if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) || |
128 | !memblock_is_memory(start + size - 1) || |
129 | overlaps_crashkernel(start, size) || overlaps_initrd(start, size)) { |
130 | p = memblock_alloc_raw(size, PAGE_SIZE); |
131 | if (!p) |
132 | panic(fmt: "Failed to allocate %lu bytes to move device tree\n" , |
133 | size); |
134 | memcpy(p, initial_boot_params, size); |
135 | initial_boot_params = p; |
136 | DBG("Moved device tree to 0x%px\n" , p); |
137 | } |
138 | |
139 | DBG("<- move_device_tree\n" ); |
140 | } |
141 | |
142 | /* |
143 | * ibm,pa/pi-features is a per-cpu property that contains a string of |
144 | * attribute descriptors, each of which has a 2 byte header plus up |
145 | * to 254 bytes worth of processor attribute bits. First header |
146 | * byte specifies the number of bytes following the header. |
147 | * Second header byte is an "attribute-specifier" type, of which |
148 | * zero is the only currently-defined value. |
149 | * Implementation: Pass in the byte and bit offset for the feature |
150 | * that we are interested in. The function will return -1 if the |
151 | * pa-features property is missing, or a 1/0 to indicate if the feature |
152 | * is supported/not supported. Note that the bit numbers are |
153 | * big-endian to match the definition in PAPR. |
154 | * Note: the 'clear' flag clears the feature if the bit is set in the |
155 | * ibm,pa/pi-features property, it does not set the feature if the |
156 | * bit is clear. |
157 | */ |
158 | struct ibm_feature { |
159 | unsigned long cpu_features; /* CPU_FTR_xxx bit */ |
160 | unsigned long mmu_features; /* MMU_FTR_xxx bit */ |
161 | unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ |
162 | unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */ |
163 | unsigned char pabyte; /* byte number in ibm,pa/pi-features */ |
164 | unsigned char pabit; /* bit number (big-endian) */ |
165 | unsigned char clear; /* if 1, pa bit set => clear feature */ |
166 | }; |
167 | |
168 | static struct ibm_feature ibm_pa_features[] __initdata = { |
169 | { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU }, |
170 | { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU }, |
171 | { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, |
172 | { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE }, |
173 | { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE }, |
174 | #ifdef CONFIG_PPC_RADIX_MMU |
175 | { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, |
176 | #endif |
177 | { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, |
178 | .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, |
179 | /* |
180 | * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n), |
181 | * we don't want to turn on TM here, so we use the *_COMP versions |
182 | * which are 0 if the kernel doesn't support TM. |
183 | */ |
184 | { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP, |
185 | .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, |
186 | |
187 | { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 }, |
188 | { .pabyte = 68, .pabit = 5, .cpu_features = CPU_FTR_DEXCR_NPHIE }, |
189 | }; |
190 | |
191 | /* |
192 | * ibm,pi-features property provides the support of processor specific |
193 | * options not described in ibm,pa-features. Right now use byte 0, bit 3 |
194 | * which indicates the occurrence of DSI interrupt when the paste operation |
195 | * on the suspended NX window. |
196 | */ |
197 | static struct ibm_feature ibm_pi_features[] __initdata = { |
198 | { .pabyte = 0, .pabit = 3, .mmu_features = MMU_FTR_NX_DSI }, |
199 | { .pabyte = 0, .pabit = 4, .cpu_features = CPU_FTR_DBELL, .clear = 1 }, |
200 | }; |
201 | |
202 | static void __init scan_features(unsigned long node, const unsigned char *ftrs, |
203 | unsigned long tablelen, |
204 | struct ibm_feature *fp, |
205 | unsigned long ft_size) |
206 | { |
207 | unsigned long i, len, bit; |
208 | |
209 | /* find descriptor with type == 0 */ |
210 | for (;;) { |
211 | if (tablelen < 3) |
212 | return; |
213 | len = 2 + ftrs[0]; |
214 | if (tablelen < len) |
215 | return; /* descriptor 0 not found */ |
216 | if (ftrs[1] == 0) |
217 | break; |
218 | tablelen -= len; |
219 | ftrs += len; |
220 | } |
221 | |
222 | /* loop over bits we know about */ |
223 | for (i = 0; i < ft_size; ++i, ++fp) { |
224 | if (fp->pabyte >= ftrs[0]) |
225 | continue; |
226 | bit = (ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1; |
227 | if (bit && !fp->clear) { |
228 | cur_cpu_spec->cpu_features |= fp->cpu_features; |
229 | cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; |
230 | cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2; |
231 | cur_cpu_spec->mmu_features |= fp->mmu_features; |
232 | } else if (bit == fp->clear) { |
233 | cur_cpu_spec->cpu_features &= ~fp->cpu_features; |
234 | cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; |
235 | cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2; |
236 | cur_cpu_spec->mmu_features &= ~fp->mmu_features; |
237 | } |
238 | } |
239 | } |
240 | |
241 | static void __init check_cpu_features(unsigned long node, char *name, |
242 | struct ibm_feature *fp, |
243 | unsigned long size) |
244 | { |
245 | const unsigned char *pa_ftrs; |
246 | int tablelen; |
247 | |
248 | pa_ftrs = of_get_flat_dt_prop(node, name, size: &tablelen); |
249 | if (pa_ftrs == NULL) |
250 | return; |
251 | |
252 | scan_features(node, ftrs: pa_ftrs, tablelen, fp, ft_size: size); |
253 | } |
254 | |
255 | #ifdef CONFIG_PPC_64S_HASH_MMU |
256 | static void __init init_mmu_slb_size(unsigned long node) |
257 | { |
258 | const __be32 *slb_size_ptr; |
259 | |
260 | slb_size_ptr = of_get_flat_dt_prop(node, "slb-size" , NULL) ? : |
261 | of_get_flat_dt_prop(node, "ibm,slb-size" , NULL); |
262 | |
263 | if (slb_size_ptr) |
264 | mmu_slb_size = be32_to_cpup(slb_size_ptr); |
265 | } |
266 | #else |
267 | #define init_mmu_slb_size(node) do { } while(0) |
268 | #endif |
269 | |
270 | static struct feature_property { |
271 | const char *name; |
272 | u32 min_value; |
273 | unsigned long cpu_feature; |
274 | unsigned long cpu_user_ftr; |
275 | } feature_properties[] __initdata = { |
276 | #ifdef CONFIG_ALTIVEC |
277 | {"altivec" , 0, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC}, |
278 | {"ibm,vmx" , 1, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC}, |
279 | #endif /* CONFIG_ALTIVEC */ |
280 | #ifdef CONFIG_VSX |
281 | /* Yes, this _really_ is ibm,vmx == 2 to enable VSX */ |
282 | {"ibm,vmx" , 2, CPU_FTR_VSX, PPC_FEATURE_HAS_VSX}, |
283 | #endif /* CONFIG_VSX */ |
284 | #ifdef CONFIG_PPC64 |
285 | {"ibm,dfp" , 1, 0, PPC_FEATURE_HAS_DFP}, |
286 | {"ibm,purr" , 1, CPU_FTR_PURR, 0}, |
287 | {"ibm,spurr" , 1, CPU_FTR_SPURR, 0}, |
288 | #endif /* CONFIG_PPC64 */ |
289 | }; |
290 | |
291 | #if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU) |
292 | static __init void identical_pvr_fixup(unsigned long node) |
293 | { |
294 | unsigned int pvr; |
295 | const char *model = of_get_flat_dt_prop(node, "model" , NULL); |
296 | |
297 | /* |
298 | * Since 440GR(x)/440EP(x) processors have the same pvr, |
299 | * we check the node path and set bit 28 in the cur_cpu_spec |
300 | * pvr for EP(x) processor version. This bit is always 0 in |
301 | * the "real" pvr. Then we call identify_cpu again with |
302 | * the new logical pvr to enable FPU support. |
303 | */ |
304 | if (model && strstr(model, "440EP" )) { |
305 | pvr = cur_cpu_spec->pvr_value | 0x8; |
306 | identify_cpu(0, pvr); |
307 | DBG("Using logical pvr %x for %s\n" , pvr, model); |
308 | } |
309 | } |
310 | #else |
311 | #define identical_pvr_fixup(node) do { } while(0) |
312 | #endif |
313 | |
314 | static void __init check_cpu_feature_properties(unsigned long node) |
315 | { |
316 | int i; |
317 | struct feature_property *fp = feature_properties; |
318 | const __be32 *prop; |
319 | |
320 | for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) { |
321 | prop = of_get_flat_dt_prop(node, name: fp->name, NULL); |
322 | if (prop && be32_to_cpup(p: prop) >= fp->min_value) { |
323 | cur_cpu_spec->cpu_features |= fp->cpu_feature; |
324 | cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftr; |
325 | } |
326 | } |
327 | } |
328 | |
329 | static int __init early_init_dt_scan_cpus(unsigned long node, |
330 | const char *uname, int depth, |
331 | void *data) |
332 | { |
333 | const char *type = of_get_flat_dt_prop(node, name: "device_type" , NULL); |
334 | const __be32 *prop; |
335 | const __be32 *intserv; |
336 | int i, nthreads; |
337 | int len; |
338 | int found = -1; |
339 | int found_thread = 0; |
340 | |
341 | /* We are scanning "cpu" nodes only */ |
342 | if (type == NULL || strcmp(type, "cpu" ) != 0) |
343 | return 0; |
344 | |
345 | if (IS_ENABLED(CONFIG_PPC64)) |
346 | boot_cpu_node_count++; |
347 | |
348 | /* Get physical cpuid */ |
349 | intserv = of_get_flat_dt_prop(node, name: "ibm,ppc-interrupt-server#s" , size: &len); |
350 | if (!intserv) |
351 | intserv = of_get_flat_dt_prop(node, name: "reg" , size: &len); |
352 | |
353 | nthreads = len / sizeof(int); |
354 | |
355 | /* |
356 | * Now see if any of these threads match our boot cpu. |
357 | * NOTE: This must match the parsing done in smp_setup_cpu_maps. |
358 | */ |
359 | for (i = 0; i < nthreads; i++) { |
360 | if (be32_to_cpu(intserv[i]) == |
361 | fdt_boot_cpuid_phys(initial_boot_params)) { |
362 | found = boot_cpu_count; |
363 | found_thread = i; |
364 | } |
365 | #ifdef CONFIG_SMP |
366 | /* logical cpu id is always 0 on UP kernels */ |
367 | boot_cpu_count++; |
368 | #endif |
369 | } |
370 | |
371 | /* Not the boot CPU */ |
372 | if (found < 0) |
373 | return 0; |
374 | |
375 | boot_cpuid = found; |
376 | |
377 | if (IS_ENABLED(CONFIG_PPC64)) |
378 | boot_cpu_hwid = be32_to_cpu(intserv[found_thread]); |
379 | |
380 | if (nr_cpu_ids % nthreads != 0) { |
381 | set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads)); |
382 | pr_warn("nr_cpu_ids was not a multiple of threads_per_core, adjusted to %d\n" , |
383 | nr_cpu_ids); |
384 | } |
385 | |
386 | if (boot_cpuid >= nr_cpu_ids) { |
387 | // Remember boot core for smp_setup_cpu_maps() |
388 | boot_core_hwid = be32_to_cpu(intserv[0]); |
389 | |
390 | pr_warn("Boot CPU %d (core hwid %d) >= nr_cpu_ids, adjusted boot CPU to %d\n" , |
391 | boot_cpuid, boot_core_hwid, found_thread); |
392 | |
393 | // Adjust boot CPU to appear on logical core 0 |
394 | boot_cpuid = found_thread; |
395 | } |
396 | |
397 | DBG("boot cpu: logical %d physical %d\n" , boot_cpuid, |
398 | be32_to_cpu(intserv[found_thread])); |
399 | |
400 | /* |
401 | * PAPR defines "logical" PVR values for cpus that |
402 | * meet various levels of the architecture: |
403 | * 0x0f000001 Architecture version 2.04 |
404 | * 0x0f000002 Architecture version 2.05 |
405 | * If the cpu-version property in the cpu node contains |
406 | * such a value, we call identify_cpu again with the |
407 | * logical PVR value in order to use the cpu feature |
408 | * bits appropriate for the architecture level. |
409 | * |
410 | * A POWER6 partition in "POWER6 architected" mode |
411 | * uses the 0x0f000002 PVR value; in POWER5+ mode |
412 | * it uses 0x0f000001. |
413 | * |
414 | * If we're using device tree CPU feature discovery then we don't |
415 | * support the cpu-version property, and it's the responsibility of the |
416 | * firmware/hypervisor to provide the correct feature set for the |
417 | * architecture level via the ibm,powerpc-cpu-features binding. |
418 | */ |
419 | if (!dt_cpu_ftrs_in_use()) { |
420 | prop = of_get_flat_dt_prop(node, name: "cpu-version" , NULL); |
421 | if (prop && (be32_to_cpup(p: prop) & 0xff000000) == 0x0f000000) { |
422 | identify_cpu(0, be32_to_cpup(p: prop)); |
423 | seq_buf_printf(s: &ppc_hw_desc, fmt: "0x%04x " , be32_to_cpup(p: prop)); |
424 | } |
425 | |
426 | check_cpu_feature_properties(node); |
427 | check_cpu_features(node, "ibm,pa-features" , ibm_pa_features, |
428 | ARRAY_SIZE(ibm_pa_features)); |
429 | check_cpu_features(node, "ibm,pi-features" , ibm_pi_features, |
430 | ARRAY_SIZE(ibm_pi_features)); |
431 | } |
432 | |
433 | identical_pvr_fixup(node); |
434 | init_mmu_slb_size(node); |
435 | |
436 | #ifdef CONFIG_PPC64 |
437 | if (nthreads == 1) |
438 | cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; |
439 | else if (!dt_cpu_ftrs_in_use()) |
440 | cur_cpu_spec->cpu_features |= CPU_FTR_SMT; |
441 | #endif |
442 | |
443 | return 0; |
444 | } |
445 | |
446 | static int __init early_init_dt_scan_chosen_ppc(unsigned long node, |
447 | const char *uname, |
448 | int depth, void *data) |
449 | { |
450 | const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ |
451 | |
452 | /* Use common scan routine to determine if this is the chosen node */ |
453 | if (early_init_dt_scan_chosen(cmdline: data) < 0) |
454 | return 0; |
455 | |
456 | #ifdef CONFIG_PPC64 |
457 | /* check if iommu is forced on or off */ |
458 | if (of_get_flat_dt_prop(node, "linux,iommu-off" , NULL) != NULL) |
459 | iommu_is_off = 1; |
460 | if (of_get_flat_dt_prop(node, "linux,iommu-force-on" , NULL) != NULL) |
461 | iommu_force_on = 1; |
462 | #endif |
463 | |
464 | /* mem=x on the command line is the preferred mechanism */ |
465 | lprop = of_get_flat_dt_prop(node, name: "linux,memory-limit" , NULL); |
466 | if (lprop) |
467 | memory_limit = *lprop; |
468 | |
469 | #ifdef CONFIG_PPC64 |
470 | lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start" , NULL); |
471 | if (lprop) |
472 | tce_alloc_start = *lprop; |
473 | lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end" , NULL); |
474 | if (lprop) |
475 | tce_alloc_end = *lprop; |
476 | #endif |
477 | |
478 | #ifdef CONFIG_CRASH_RESERVE |
479 | lprop = of_get_flat_dt_prop(node, name: "linux,crashkernel-base" , NULL); |
480 | if (lprop) |
481 | crashk_res.start = *lprop; |
482 | |
483 | lprop = of_get_flat_dt_prop(node, name: "linux,crashkernel-size" , NULL); |
484 | if (lprop) |
485 | crashk_res.end = crashk_res.start + *lprop - 1; |
486 | #endif |
487 | |
488 | /* break now */ |
489 | return 1; |
490 | } |
491 | |
492 | /* |
493 | * Compare the range against max mem limit and update |
494 | * size if it cross the limit. |
495 | */ |
496 | |
497 | #ifdef CONFIG_SPARSEMEM |
498 | static bool __init validate_mem_limit(u64 base, u64 *size) |
499 | { |
500 | u64 max_mem = 1UL << (MAX_PHYSMEM_BITS); |
501 | |
502 | if (base >= max_mem) |
503 | return false; |
504 | if ((base + *size) > max_mem) |
505 | *size = max_mem - base; |
506 | return true; |
507 | } |
508 | #else |
509 | static bool __init validate_mem_limit(u64 base, u64 *size) |
510 | { |
511 | return true; |
512 | } |
513 | #endif |
514 | |
515 | #ifdef CONFIG_PPC_PSERIES |
516 | /* |
517 | * Interpret the ibm dynamic reconfiguration memory LMBs. |
518 | * This contains a list of memory blocks along with NUMA affinity |
519 | * information. |
520 | */ |
521 | static int __init early_init_drmem_lmb(struct drmem_lmb *lmb, |
522 | const __be32 **usm, |
523 | void *data) |
524 | { |
525 | u64 base, size; |
526 | int is_kexec_kdump = 0, rngs; |
527 | |
528 | base = lmb->base_addr; |
529 | size = drmem_lmb_size(); |
530 | rngs = 1; |
531 | |
532 | /* |
533 | * Skip this block if the reserved bit is set in flags |
534 | * or if the block is not assigned to this partition. |
535 | */ |
536 | if ((lmb->flags & DRCONF_MEM_RESERVED) || |
537 | !(lmb->flags & DRCONF_MEM_ASSIGNED)) |
538 | return 0; |
539 | |
540 | if (*usm) |
541 | is_kexec_kdump = 1; |
542 | |
543 | if (is_kexec_kdump) { |
544 | /* |
545 | * For each memblock in ibm,dynamic-memory, a |
546 | * corresponding entry in linux,drconf-usable-memory |
547 | * property contains a counter 'p' followed by 'p' |
548 | * (base, size) duple. Now read the counter from |
549 | * linux,drconf-usable-memory property |
550 | */ |
551 | rngs = dt_mem_next_cell(dt_root_size_cells, usm); |
552 | if (!rngs) /* there are no (base, size) duple */ |
553 | return 0; |
554 | } |
555 | |
556 | do { |
557 | if (is_kexec_kdump) { |
558 | base = dt_mem_next_cell(dt_root_addr_cells, usm); |
559 | size = dt_mem_next_cell(dt_root_size_cells, usm); |
560 | } |
561 | |
562 | if (iommu_is_off) { |
563 | if (base >= 0x80000000ul) |
564 | continue; |
565 | if ((base + size) > 0x80000000ul) |
566 | size = 0x80000000ul - base; |
567 | } |
568 | |
569 | if (!validate_mem_limit(base, &size)) |
570 | continue; |
571 | |
572 | DBG("Adding: %llx -> %llx\n" , base, size); |
573 | memblock_add(base, size); |
574 | |
575 | if (lmb->flags & DRCONF_MEM_HOTREMOVABLE) |
576 | memblock_mark_hotplug(base, size); |
577 | } while (--rngs); |
578 | |
579 | return 0; |
580 | } |
581 | #endif /* CONFIG_PPC_PSERIES */ |
582 | |
583 | static int __init early_init_dt_scan_memory_ppc(void) |
584 | { |
585 | #ifdef CONFIG_PPC_PSERIES |
586 | const void *fdt = initial_boot_params; |
587 | int node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory" ); |
588 | |
589 | if (node > 0) |
590 | walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb); |
591 | |
592 | #endif |
593 | |
594 | return early_init_dt_scan_memory(); |
595 | } |
596 | |
597 | /* |
598 | * For a relocatable kernel, we need to get the memstart_addr first, |
599 | * then use it to calculate the virtual kernel start address. This has |
600 | * to happen at a very early stage (before machine_init). In this case, |
601 | * we just want to get the memstart_address and would not like to mess the |
602 | * memblock at this stage. So introduce a variable to skip the memblock_add() |
603 | * for this reason. |
604 | */ |
605 | #ifdef CONFIG_RELOCATABLE |
606 | static int add_mem_to_memblock = 1; |
607 | #else |
608 | #define add_mem_to_memblock 1 |
609 | #endif |
610 | |
611 | void __init early_init_dt_add_memory_arch(u64 base, u64 size) |
612 | { |
613 | #ifdef CONFIG_PPC64 |
614 | if (iommu_is_off) { |
615 | if (base >= 0x80000000ul) |
616 | return; |
617 | if ((base + size) > 0x80000000ul) |
618 | size = 0x80000000ul - base; |
619 | } |
620 | #endif |
621 | /* Keep track of the beginning of memory -and- the size of |
622 | * the very first block in the device-tree as it represents |
623 | * the RMA on ppc64 server |
624 | */ |
625 | if (base < memstart_addr) { |
626 | memstart_addr = base; |
627 | first_memblock_size = size; |
628 | } |
629 | |
630 | /* Add the chunk to the MEMBLOCK list */ |
631 | if (add_mem_to_memblock) { |
632 | if (validate_mem_limit(base, size: &size)) |
633 | memblock_add(base, size); |
634 | } |
635 | } |
636 | |
637 | static void __init early_reserve_mem_dt(void) |
638 | { |
639 | unsigned long i, dt_root; |
640 | int len; |
641 | const __be32 *prop; |
642 | |
643 | early_init_fdt_reserve_self(); |
644 | early_init_fdt_scan_reserved_mem(); |
645 | |
646 | dt_root = of_get_flat_dt_root(); |
647 | |
648 | prop = of_get_flat_dt_prop(node: dt_root, name: "reserved-ranges" , size: &len); |
649 | |
650 | if (!prop) |
651 | return; |
652 | |
653 | DBG("Found new-style reserved-ranges\n" ); |
654 | |
655 | /* Each reserved range is an (address,size) pair, 2 cells each, |
656 | * totalling 4 cells per range. */ |
657 | for (i = 0; i < len / (sizeof(*prop) * 4); i++) { |
658 | u64 base, size; |
659 | |
660 | base = of_read_number(cell: prop + (i * 4) + 0, size: 2); |
661 | size = of_read_number(cell: prop + (i * 4) + 2, size: 2); |
662 | |
663 | if (size) { |
664 | DBG("reserving: %llx -> %llx\n" , base, size); |
665 | memblock_reserve(base, size); |
666 | } |
667 | } |
668 | } |
669 | |
670 | static void __init early_reserve_mem(void) |
671 | { |
672 | __be64 *reserve_map; |
673 | |
674 | reserve_map = (__be64 *)(((unsigned long)initial_boot_params) + |
675 | fdt_off_mem_rsvmap(initial_boot_params)); |
676 | |
677 | /* Look for the new "reserved-regions" property in the DT */ |
678 | early_reserve_mem_dt(); |
679 | |
680 | #ifdef CONFIG_BLK_DEV_INITRD |
681 | /* Then reserve the initrd, if any */ |
682 | if (initrd_start && (initrd_end > initrd_start)) { |
683 | memblock_reserve(ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE), |
684 | ALIGN(initrd_end, PAGE_SIZE) - |
685 | ALIGN_DOWN(initrd_start, PAGE_SIZE)); |
686 | } |
687 | #endif /* CONFIG_BLK_DEV_INITRD */ |
688 | |
689 | if (!IS_ENABLED(CONFIG_PPC32)) |
690 | return; |
691 | |
692 | /* |
693 | * Handle the case where we might be booting from an old kexec |
694 | * image that setup the mem_rsvmap as pairs of 32-bit values |
695 | */ |
696 | if (be64_to_cpup(p: reserve_map) > 0xffffffffull) { |
697 | u32 base_32, size_32; |
698 | __be32 *reserve_map_32 = (__be32 *)reserve_map; |
699 | |
700 | DBG("Found old 32-bit reserve map\n" ); |
701 | |
702 | while (1) { |
703 | base_32 = be32_to_cpup(p: reserve_map_32++); |
704 | size_32 = be32_to_cpup(p: reserve_map_32++); |
705 | if (size_32 == 0) |
706 | break; |
707 | DBG("reserving: %x -> %x\n" , base_32, size_32); |
708 | memblock_reserve(base: base_32, size: size_32); |
709 | } |
710 | return; |
711 | } |
712 | } |
713 | |
714 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
715 | static bool tm_disabled __initdata; |
716 | |
717 | static int __init parse_ppc_tm(char *str) |
718 | { |
719 | bool res; |
720 | |
721 | if (kstrtobool(str, &res)) |
722 | return -EINVAL; |
723 | |
724 | tm_disabled = !res; |
725 | |
726 | return 0; |
727 | } |
728 | early_param("ppc_tm" , parse_ppc_tm); |
729 | |
730 | static void __init tm_init(void) |
731 | { |
732 | if (tm_disabled) { |
733 | pr_info("Disabling hardware transactional memory (HTM)\n" ); |
734 | cur_cpu_spec->cpu_user_features2 &= |
735 | ~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM); |
736 | cur_cpu_spec->cpu_features &= ~CPU_FTR_TM; |
737 | return; |
738 | } |
739 | |
740 | pnv_tm_init(); |
741 | } |
742 | #else |
743 | static void tm_init(void) { } |
744 | #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ |
745 | |
746 | static int __init |
747 | early_init_dt_scan_model(unsigned long node, const char *uname, |
748 | int depth, void *data) |
749 | { |
750 | const char *prop; |
751 | |
752 | if (depth != 0) |
753 | return 0; |
754 | |
755 | prop = of_get_flat_dt_prop(node, name: "model" , NULL); |
756 | if (prop) |
757 | seq_buf_printf(&ppc_hw_desc, "%s " , prop); |
758 | |
759 | /* break now */ |
760 | return 1; |
761 | } |
762 | |
763 | #ifdef CONFIG_PPC64 |
764 | static void __init save_fscr_to_task(void) |
765 | { |
766 | /* |
767 | * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we |
768 | * have configured via the device tree features or via __init_FSCR(). |
769 | * That value will then be propagated to pid 1 (init) and all future |
770 | * processes. |
771 | */ |
772 | if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) |
773 | init_task.thread.fscr = mfspr(SPRN_FSCR); |
774 | } |
775 | #else |
776 | static inline void save_fscr_to_task(void) {} |
777 | #endif |
778 | |
779 | |
780 | void __init early_init_devtree(void *params) |
781 | { |
782 | phys_addr_t limit; |
783 | |
784 | DBG(" -> early_init_devtree(%px)\n" , params); |
785 | |
786 | /* Too early to BUG_ON(), do it by hand */ |
787 | if (!early_init_dt_verify(params)) |
788 | panic(fmt: "BUG: Failed verifying flat device tree, bad version?" ); |
789 | |
790 | of_scan_flat_dt(it: early_init_dt_scan_model, NULL); |
791 | |
792 | #ifdef CONFIG_PPC_RTAS |
793 | /* Some machines might need RTAS info for debugging, grab it now. */ |
794 | of_scan_flat_dt(early_init_dt_scan_rtas, NULL); |
795 | #endif |
796 | |
797 | #ifdef CONFIG_PPC_POWERNV |
798 | /* Some machines might need OPAL info for debugging, grab it now. */ |
799 | of_scan_flat_dt(early_init_dt_scan_opal, NULL); |
800 | |
801 | /* Scan tree for ultravisor feature */ |
802 | of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL); |
803 | #endif |
804 | |
805 | #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) |
806 | /* scan tree to see if dump is active during last boot */ |
807 | of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); |
808 | #endif |
809 | |
810 | /* Retrieve various informations from the /chosen node of the |
811 | * device-tree, including the platform type, initrd location and |
812 | * size, TCE reserve, and more ... |
813 | */ |
814 | of_scan_flat_dt(it: early_init_dt_scan_chosen_ppc, data: boot_command_line); |
815 | |
816 | /* Scan memory nodes and rebuild MEMBLOCKs */ |
817 | early_init_dt_scan_root(); |
818 | early_init_dt_scan_memory_ppc(); |
819 | |
820 | /* |
821 | * As generic code authors expect to be able to use static keys |
822 | * in early_param() handlers, we initialize the static keys just |
823 | * before parsing early params (it's fine to call jump_label_init() |
824 | * more than once). |
825 | */ |
826 | jump_label_init(); |
827 | parse_early_param(); |
828 | |
829 | /* make sure we've parsed cmdline for mem= before this */ |
830 | if (memory_limit) |
831 | first_memblock_size = min_t(u64, first_memblock_size, memory_limit); |
832 | setup_initial_memory_limit(memstart_addr, first_memblock_size); |
833 | /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ |
834 | memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); |
835 | /* If relocatable, reserve first 32k for interrupt vectors etc. */ |
836 | if (PHYSICAL_START > MEMORY_START) |
837 | memblock_reserve(MEMORY_START, 0x8000); |
838 | reserve_kdump_trampoline(); |
839 | #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) |
840 | /* |
841 | * If we fail to reserve memory for firmware-assisted dump then |
842 | * fallback to kexec based kdump. |
843 | */ |
844 | if (fadump_reserve_mem() == 0) |
845 | #endif |
846 | reserve_crashkernel(); |
847 | early_reserve_mem(); |
848 | |
849 | /* Ensure that total memory size is page-aligned. */ |
850 | limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); |
851 | memblock_enforce_memory_limit(memory_limit: limit); |
852 | |
853 | #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) |
854 | if (!early_radix_enabled()) |
855 | memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS)); |
856 | #endif |
857 | |
858 | memblock_allow_resize(); |
859 | memblock_dump_all(); |
860 | |
861 | DBG("Phys. mem: %llx\n" , (unsigned long long)memblock_phys_mem_size()); |
862 | |
863 | /* We may need to relocate the flat tree, do it now. |
864 | * FIXME .. and the initrd too? */ |
865 | move_device_tree(); |
866 | |
867 | DBG("Scanning CPUs ...\n" ); |
868 | |
869 | dt_cpu_ftrs_scan(); |
870 | |
871 | // We can now add the CPU name & PVR to the hardware description |
872 | seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx " , cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); |
873 | |
874 | /* Retrieve CPU related informations from the flat tree |
875 | * (altivec support, boot CPU ID, ...) |
876 | */ |
877 | of_scan_flat_dt(it: early_init_dt_scan_cpus, NULL); |
878 | if (boot_cpuid < 0) { |
879 | printk("Failed to identify boot CPU !\n" ); |
880 | BUG(); |
881 | } |
882 | |
883 | save_fscr_to_task(); |
884 | |
885 | #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) |
886 | /* We'll later wait for secondaries to check in; there are |
887 | * NCPUS-1 non-boot CPUs :-) |
888 | */ |
889 | spinning_secondaries = boot_cpu_count - 1; |
890 | #endif |
891 | |
892 | mmu_early_init_devtree(); |
893 | |
894 | #ifdef CONFIG_PPC_POWERNV |
895 | /* Scan and build the list of machine check recoverable ranges */ |
896 | of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); |
897 | #endif |
898 | epapr_paravirt_early_init(); |
899 | |
900 | /* Now try to figure out if we are running on LPAR and so on */ |
901 | pseries_probe_fw_features(); |
902 | |
903 | /* |
904 | * Initialize pkey features and default AMR/IAMR values |
905 | */ |
906 | pkey_early_init_devtree(); |
907 | |
908 | #ifdef CONFIG_PPC_PS3 |
909 | /* Identify PS3 firmware */ |
910 | if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3" )) |
911 | powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; |
912 | #endif |
913 | |
914 | /* If kexec left a PLPKS password in the DT, get it and clear it */ |
915 | plpks_early_init_devtree(); |
916 | |
917 | tm_init(); |
918 | |
919 | DBG(" <- early_init_devtree()\n" ); |
920 | } |
921 | |
922 | #ifdef CONFIG_RELOCATABLE |
923 | /* |
924 | * This function run before early_init_devtree, so we have to init |
925 | * initial_boot_params. |
926 | */ |
927 | void __init early_get_first_memblock_info(void *params, phys_addr_t *size) |
928 | { |
929 | /* Setup flat device-tree pointer */ |
930 | initial_boot_params = params; |
931 | |
932 | /* |
933 | * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid |
934 | * mess the memblock. |
935 | */ |
936 | add_mem_to_memblock = 0; |
937 | early_init_dt_scan_root(); |
938 | early_init_dt_scan_memory_ppc(); |
939 | add_mem_to_memblock = 1; |
940 | |
941 | if (size) |
942 | *size = first_memblock_size; |
943 | } |
944 | #endif |
945 | |
946 | /******* |
947 | * |
948 | * New implementation of the OF "find" APIs, return a refcounted |
949 | * object, call of_node_put() when done. The device tree and list |
950 | * are protected by a rw_lock. |
951 | * |
952 | * Note that property management will need some locking as well, |
953 | * this isn't dealt with yet. |
954 | * |
955 | *******/ |
956 | |
957 | /** |
958 | * of_get_ibm_chip_id - Returns the IBM "chip-id" of a device |
959 | * @np: device node of the device |
960 | * |
961 | * This looks for a property "ibm,chip-id" in the node or any |
962 | * of its parents and returns its content, or -1 if it cannot |
963 | * be found. |
964 | */ |
965 | int of_get_ibm_chip_id(struct device_node *np) |
966 | { |
967 | of_node_get(node: np); |
968 | while (np) { |
969 | u32 chip_id; |
970 | |
971 | /* |
972 | * Skiboot may produce memory nodes that contain more than one |
973 | * cell in chip-id, we only read the first one here. |
974 | */ |
975 | if (!of_property_read_u32(np, propname: "ibm,chip-id" , out_value: &chip_id)) { |
976 | of_node_put(node: np); |
977 | return chip_id; |
978 | } |
979 | |
980 | np = of_get_next_parent(node: np); |
981 | } |
982 | return -1; |
983 | } |
984 | EXPORT_SYMBOL(of_get_ibm_chip_id); |
985 | |
986 | /** |
987 | * cpu_to_chip_id - Return the cpus chip-id |
988 | * @cpu: The logical cpu number. |
989 | * |
990 | * Return the value of the ibm,chip-id property corresponding to the given |
991 | * logical cpu number. If the chip-id can not be found, returns -1. |
992 | */ |
993 | int cpu_to_chip_id(int cpu) |
994 | { |
995 | struct device_node *np; |
996 | int ret = -1, idx; |
997 | |
998 | idx = cpu / threads_per_core; |
999 | if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1) |
1000 | return chip_id_lookup_table[idx]; |
1001 | |
1002 | np = of_get_cpu_node(cpu, NULL); |
1003 | if (np) { |
1004 | ret = of_get_ibm_chip_id(np); |
1005 | of_node_put(node: np); |
1006 | |
1007 | if (chip_id_lookup_table) |
1008 | chip_id_lookup_table[idx] = ret; |
1009 | } |
1010 | |
1011 | return ret; |
1012 | } |
1013 | EXPORT_SYMBOL(cpu_to_chip_id); |
1014 | |
1015 | bool arch_match_cpu_phys_id(int cpu, u64 phys_id) |
1016 | { |
1017 | #ifdef CONFIG_SMP |
1018 | /* |
1019 | * Early firmware scanning must use this rather than |
1020 | * get_hard_smp_processor_id because we don't have pacas allocated |
1021 | * until memory topology is discovered. |
1022 | */ |
1023 | if (cpu_to_phys_id != NULL) |
1024 | return (int)phys_id == cpu_to_phys_id[cpu]; |
1025 | #endif |
1026 | |
1027 | return (int)phys_id == get_hard_smp_processor_id(cpu); |
1028 | } |
1029 | |