1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * PowerPC64 SLB support. |
4 | * |
5 | * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM |
6 | * Based on earlier code written by: |
7 | * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com |
8 | * Copyright (c) 2001 Dave Engebretsen |
9 | * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM |
10 | */ |
11 | |
12 | #include <asm/interrupt.h> |
13 | #include <asm/mmu.h> |
14 | #include <asm/mmu_context.h> |
15 | #include <asm/paca.h> |
16 | #include <asm/lppaca.h> |
17 | #include <asm/ppc-opcode.h> |
18 | #include <asm/cputable.h> |
19 | #include <asm/cacheflush.h> |
20 | #include <asm/smp.h> |
21 | #include <linux/compiler.h> |
22 | #include <linux/context_tracking.h> |
23 | #include <linux/mm_types.h> |
24 | #include <linux/pgtable.h> |
25 | |
26 | #include <asm/udbg.h> |
27 | #include <asm/code-patching.h> |
28 | |
29 | #include "internal.h" |
30 | |
31 | |
32 | static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); |
33 | |
34 | bool stress_slb_enabled __initdata; |
35 | |
36 | static int __init parse_stress_slb(char *p) |
37 | { |
38 | stress_slb_enabled = true; |
39 | return 0; |
40 | } |
41 | early_param("stress_slb" , parse_stress_slb); |
42 | |
43 | __ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key); |
44 | |
45 | static void assert_slb_presence(bool present, unsigned long ea) |
46 | { |
47 | #ifdef CONFIG_DEBUG_VM |
48 | unsigned long tmp; |
49 | |
50 | WARN_ON_ONCE(mfmsr() & MSR_EE); |
51 | |
52 | if (!cpu_has_feature(CPU_FTR_ARCH_206)) |
53 | return; |
54 | |
55 | /* |
56 | * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware |
57 | * ignores all other bits from 0-27, so just clear them all. |
58 | */ |
59 | ea &= ~((1UL << SID_SHIFT) - 1); |
60 | asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r" (tmp) : "r" (ea) : "cr0" ); |
61 | |
62 | WARN_ON(present == (tmp == 0)); |
63 | #endif |
64 | } |
65 | |
66 | static inline void slb_shadow_update(unsigned long ea, int ssize, |
67 | unsigned long flags, |
68 | enum slb_index index) |
69 | { |
70 | struct slb_shadow *p = get_slb_shadow(); |
71 | |
72 | /* |
73 | * Clear the ESID first so the entry is not valid while we are |
74 | * updating it. No write barriers are needed here, provided |
75 | * we only update the current CPU's SLB shadow buffer. |
76 | */ |
77 | WRITE_ONCE(p->save_area[index].esid, 0); |
78 | WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags))); |
79 | WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index))); |
80 | } |
81 | |
82 | static inline void slb_shadow_clear(enum slb_index index) |
83 | { |
84 | WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index)); |
85 | } |
86 | |
87 | static inline void create_shadowed_slbe(unsigned long ea, int ssize, |
88 | unsigned long flags, |
89 | enum slb_index index) |
90 | { |
91 | /* |
92 | * Updating the shadow buffer before writing the SLB ensures |
93 | * we don't get a stale entry here if we get preempted by PHYP |
94 | * between these two statements. |
95 | */ |
96 | slb_shadow_update(ea, ssize, flags, index: index); |
97 | |
98 | assert_slb_presence(present: false, ea); |
99 | asm volatile("slbmte %0,%1" : |
100 | : "r" (mk_vsid_data(ea, ssize, flags)), |
101 | "r" (mk_esid_data(ea, ssize, index)) |
102 | : "memory" ); |
103 | } |
104 | |
105 | /* |
106 | * Insert bolted entries into SLB (which may not be empty, so don't clear |
107 | * slb_cache_ptr). |
108 | */ |
109 | void __slb_restore_bolted_realmode(void) |
110 | { |
111 | struct slb_shadow *p = get_slb_shadow(); |
112 | enum slb_index index; |
113 | |
114 | /* No isync needed because realmode. */ |
115 | for (index = 0; index < SLB_NUM_BOLTED; index++) { |
116 | asm volatile("slbmte %0,%1" : |
117 | : "r" (be64_to_cpu(p->save_area[index].vsid)), |
118 | "r" (be64_to_cpu(p->save_area[index].esid))); |
119 | } |
120 | |
121 | assert_slb_presence(present: true, ea: local_paca->kstack); |
122 | } |
123 | |
124 | /* |
125 | * Insert the bolted entries into an empty SLB. |
126 | */ |
127 | void slb_restore_bolted_realmode(void) |
128 | { |
129 | __slb_restore_bolted_realmode(); |
130 | get_paca()->slb_cache_ptr = 0; |
131 | |
132 | get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; |
133 | get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; |
134 | } |
135 | |
136 | /* |
137 | * This flushes all SLB entries including 0, so it must be realmode. |
138 | */ |
139 | void slb_flush_all_realmode(void) |
140 | { |
141 | asm volatile("slbmte %0,%0; slbia" : : "r" (0)); |
142 | } |
143 | |
144 | static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside) |
145 | { |
146 | struct slb_shadow *p = get_slb_shadow(); |
147 | unsigned long ksp_esid_data, ksp_vsid_data; |
148 | u32 ih; |
149 | |
150 | /* |
151 | * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside |
152 | * information created with Class=0 entries, which we use for kernel |
153 | * SLB entries (the SLB entries themselves are still invalidated). |
154 | * |
155 | * Older processors will ignore this optimisation. Over-invalidation |
156 | * is fine because we never rely on lookaside information existing. |
157 | */ |
158 | if (preserve_kernel_lookaside) |
159 | ih = 1; |
160 | else |
161 | ih = 0; |
162 | |
163 | ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid); |
164 | ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid); |
165 | |
166 | asm volatile(PPC_SLBIA(%0)" \n" |
167 | "slbmte %1, %2 \n" |
168 | :: "i" (ih), |
169 | "r" (ksp_vsid_data), |
170 | "r" (ksp_esid_data) |
171 | : "memory" ); |
172 | } |
173 | |
174 | /* |
175 | * This flushes non-bolted entries, it can be run in virtual mode. Must |
176 | * be called with interrupts disabled. |
177 | */ |
178 | void slb_flush_and_restore_bolted(void) |
179 | { |
180 | BUILD_BUG_ON(SLB_NUM_BOLTED != 2); |
181 | |
182 | WARN_ON(!irqs_disabled()); |
183 | |
184 | /* |
185 | * We can't take a PMU exception in the following code, so hard |
186 | * disable interrupts. |
187 | */ |
188 | hard_irq_disable(); |
189 | |
190 | isync(); |
191 | __slb_flush_and_restore_bolted(preserve_kernel_lookaside: false); |
192 | isync(); |
193 | |
194 | assert_slb_presence(present: true, ea: get_paca()->kstack); |
195 | |
196 | get_paca()->slb_cache_ptr = 0; |
197 | |
198 | get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; |
199 | get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; |
200 | } |
201 | |
202 | void slb_save_contents(struct slb_entry *slb_ptr) |
203 | { |
204 | int i; |
205 | unsigned long e, v; |
206 | |
207 | /* Save slb_cache_ptr value. */ |
208 | get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr; |
209 | |
210 | if (!slb_ptr) |
211 | return; |
212 | |
213 | for (i = 0; i < mmu_slb_size; i++) { |
214 | asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i)); |
215 | asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i)); |
216 | slb_ptr->esid = e; |
217 | slb_ptr->vsid = v; |
218 | slb_ptr++; |
219 | } |
220 | } |
221 | |
222 | void slb_dump_contents(struct slb_entry *slb_ptr) |
223 | { |
224 | int i, n; |
225 | unsigned long e, v; |
226 | unsigned long llp; |
227 | |
228 | if (!slb_ptr) |
229 | return; |
230 | |
231 | pr_err("SLB contents of cpu 0x%x\n" , smp_processor_id()); |
232 | |
233 | for (i = 0; i < mmu_slb_size; i++) { |
234 | e = slb_ptr->esid; |
235 | v = slb_ptr->vsid; |
236 | slb_ptr++; |
237 | |
238 | if (!e && !v) |
239 | continue; |
240 | |
241 | pr_err("%02d %016lx %016lx %s\n" , i, e, v, |
242 | (e & SLB_ESID_V) ? "VALID" : "NOT VALID" ); |
243 | |
244 | if (!(e & SLB_ESID_V)) |
245 | continue; |
246 | |
247 | llp = v & SLB_VSID_LLP; |
248 | if (v & SLB_VSID_B_1T) { |
249 | pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n" , |
250 | GET_ESID_1T(e), |
251 | (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp); |
252 | } else { |
253 | pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n" , |
254 | GET_ESID(e), |
255 | (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp); |
256 | } |
257 | } |
258 | |
259 | if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { |
260 | /* RR is not so useful as it's often not used for allocation */ |
261 | pr_err("SLB RR allocator index %d\n" , get_paca()->stab_rr); |
262 | |
263 | /* Dump slb cache entires as well. */ |
264 | pr_err("SLB cache ptr value = %d\n" , get_paca()->slb_save_cache_ptr); |
265 | pr_err("Valid SLB cache entries:\n" ); |
266 | n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES); |
267 | for (i = 0; i < n; i++) |
268 | pr_err("%02d EA[0-35]=%9x\n" , i, get_paca()->slb_cache[i]); |
269 | pr_err("Rest of SLB cache entries:\n" ); |
270 | for (i = n; i < SLB_CACHE_ENTRIES; i++) |
271 | pr_err("%02d EA[0-35]=%9x\n" , i, get_paca()->slb_cache[i]); |
272 | } |
273 | } |
274 | |
275 | void slb_vmalloc_update(void) |
276 | { |
277 | /* |
278 | * vmalloc is not bolted, so just have to flush non-bolted. |
279 | */ |
280 | slb_flush_and_restore_bolted(); |
281 | } |
282 | |
283 | static bool preload_hit(struct thread_info *ti, unsigned long esid) |
284 | { |
285 | unsigned char i; |
286 | |
287 | for (i = 0; i < ti->slb_preload_nr; i++) { |
288 | unsigned char idx; |
289 | |
290 | idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; |
291 | if (esid == ti->slb_preload_esid[idx]) |
292 | return true; |
293 | } |
294 | return false; |
295 | } |
296 | |
297 | static bool preload_add(struct thread_info *ti, unsigned long ea) |
298 | { |
299 | unsigned char idx; |
300 | unsigned long esid; |
301 | |
302 | if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { |
303 | /* EAs are stored >> 28 so 256MB segments don't need clearing */ |
304 | if (ea & ESID_MASK_1T) |
305 | ea &= ESID_MASK_1T; |
306 | } |
307 | |
308 | esid = ea >> SID_SHIFT; |
309 | |
310 | if (preload_hit(ti, esid)) |
311 | return false; |
312 | |
313 | idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR; |
314 | ti->slb_preload_esid[idx] = esid; |
315 | if (ti->slb_preload_nr == SLB_PRELOAD_NR) |
316 | ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR; |
317 | else |
318 | ti->slb_preload_nr++; |
319 | |
320 | return true; |
321 | } |
322 | |
323 | static void preload_age(struct thread_info *ti) |
324 | { |
325 | if (!ti->slb_preload_nr) |
326 | return; |
327 | ti->slb_preload_nr--; |
328 | ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR; |
329 | } |
330 | |
331 | void slb_setup_new_exec(void) |
332 | { |
333 | struct thread_info *ti = current_thread_info(); |
334 | struct mm_struct *mm = current->mm; |
335 | unsigned long exec = 0x10000000; |
336 | |
337 | WARN_ON(irqs_disabled()); |
338 | |
339 | /* |
340 | * preload cache can only be used to determine whether a SLB |
341 | * entry exists if it does not start to overflow. |
342 | */ |
343 | if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR) |
344 | return; |
345 | |
346 | hard_irq_disable(); |
347 | |
348 | /* |
349 | * We have no good place to clear the slb preload cache on exec, |
350 | * flush_thread is about the earliest arch hook but that happens |
351 | * after we switch to the mm and have already preloaded the SLBEs. |
352 | * |
353 | * For the most part that's probably okay to use entries from the |
354 | * previous exec, they will age out if unused. It may turn out to |
355 | * be an advantage to clear the cache before switching to it, |
356 | * however. |
357 | */ |
358 | |
359 | /* |
360 | * preload some userspace segments into the SLB. |
361 | * Almost all 32 and 64bit PowerPC executables are linked at |
362 | * 0x10000000 so it makes sense to preload this segment. |
363 | */ |
364 | if (!is_kernel_addr(exec)) { |
365 | if (preload_add(ti, ea: exec)) |
366 | slb_allocate_user(mm, ea: exec); |
367 | } |
368 | |
369 | /* Libraries and mmaps. */ |
370 | if (!is_kernel_addr(mm->mmap_base)) { |
371 | if (preload_add(ti, ea: mm->mmap_base)) |
372 | slb_allocate_user(mm, ea: mm->mmap_base); |
373 | } |
374 | |
375 | /* see switch_slb */ |
376 | asm volatile("isync" : : : "memory" ); |
377 | |
378 | local_irq_enable(); |
379 | } |
380 | |
381 | void preload_new_slb_context(unsigned long start, unsigned long sp) |
382 | { |
383 | struct thread_info *ti = current_thread_info(); |
384 | struct mm_struct *mm = current->mm; |
385 | unsigned long heap = mm->start_brk; |
386 | |
387 | WARN_ON(irqs_disabled()); |
388 | |
389 | /* see above */ |
390 | if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR) |
391 | return; |
392 | |
393 | hard_irq_disable(); |
394 | |
395 | /* Userspace entry address. */ |
396 | if (!is_kernel_addr(start)) { |
397 | if (preload_add(ti, ea: start)) |
398 | slb_allocate_user(mm, ea: start); |
399 | } |
400 | |
401 | /* Top of stack, grows down. */ |
402 | if (!is_kernel_addr(sp)) { |
403 | if (preload_add(ti, ea: sp)) |
404 | slb_allocate_user(mm, ea: sp); |
405 | } |
406 | |
407 | /* Bottom of heap, grows up. */ |
408 | if (heap && !is_kernel_addr(heap)) { |
409 | if (preload_add(ti, ea: heap)) |
410 | slb_allocate_user(mm, ea: heap); |
411 | } |
412 | |
413 | /* see switch_slb */ |
414 | asm volatile("isync" : : : "memory" ); |
415 | |
416 | local_irq_enable(); |
417 | } |
418 | |
419 | static void slb_cache_slbie_kernel(unsigned int index) |
420 | { |
421 | unsigned long slbie_data = get_paca()->slb_cache[index]; |
422 | unsigned long ksp = get_paca()->kstack; |
423 | |
424 | slbie_data <<= SID_SHIFT; |
425 | slbie_data |= 0xc000000000000000ULL; |
426 | if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data) |
427 | return; |
428 | slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT; |
429 | |
430 | asm volatile("slbie %0" : : "r" (slbie_data)); |
431 | } |
432 | |
433 | static void slb_cache_slbie_user(unsigned int index) |
434 | { |
435 | unsigned long slbie_data = get_paca()->slb_cache[index]; |
436 | |
437 | slbie_data <<= SID_SHIFT; |
438 | slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT; |
439 | slbie_data |= SLBIE_C; /* user slbs have C=1 */ |
440 | |
441 | asm volatile("slbie %0" : : "r" (slbie_data)); |
442 | } |
443 | |
444 | /* Flush all user entries from the segment table of the current processor. */ |
445 | void switch_slb(struct task_struct *tsk, struct mm_struct *mm) |
446 | { |
447 | struct thread_info *ti = task_thread_info(tsk); |
448 | unsigned char i; |
449 | |
450 | /* |
451 | * We need interrupts hard-disabled here, not just soft-disabled, |
452 | * so that a PMU interrupt can't occur, which might try to access |
453 | * user memory (to get a stack trace) and possible cause an SLB miss |
454 | * which would update the slb_cache/slb_cache_ptr fields in the PACA. |
455 | */ |
456 | hard_irq_disable(); |
457 | isync(); |
458 | if (stress_slb()) { |
459 | __slb_flush_and_restore_bolted(preserve_kernel_lookaside: false); |
460 | isync(); |
461 | get_paca()->slb_cache_ptr = 0; |
462 | get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; |
463 | |
464 | } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
465 | /* |
466 | * SLBIA IH=3 invalidates all Class=1 SLBEs and their |
467 | * associated lookaside structures, which matches what |
468 | * switch_slb wants. So ARCH_300 does not use the slb |
469 | * cache. |
470 | */ |
471 | asm volatile(PPC_SLBIA(3)); |
472 | |
473 | } else { |
474 | unsigned long offset = get_paca()->slb_cache_ptr; |
475 | |
476 | if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && |
477 | offset <= SLB_CACHE_ENTRIES) { |
478 | /* |
479 | * Could assert_slb_presence(true) here, but |
480 | * hypervisor or machine check could have come |
481 | * in and removed the entry at this point. |
482 | */ |
483 | |
484 | for (i = 0; i < offset; i++) |
485 | slb_cache_slbie_user(index: i); |
486 | |
487 | /* Workaround POWER5 < DD2.1 issue */ |
488 | if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1) |
489 | slb_cache_slbie_user(index: 0); |
490 | |
491 | } else { |
492 | /* Flush but retain kernel lookaside information */ |
493 | __slb_flush_and_restore_bolted(preserve_kernel_lookaside: true); |
494 | isync(); |
495 | |
496 | get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; |
497 | } |
498 | |
499 | get_paca()->slb_cache_ptr = 0; |
500 | } |
501 | get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; |
502 | |
503 | copy_mm_to_paca(mm); |
504 | |
505 | /* |
506 | * We gradually age out SLBs after a number of context switches to |
507 | * reduce reload overhead of unused entries (like we do with FP/VEC |
508 | * reload). Each time we wrap 256 switches, take an entry out of the |
509 | * SLB preload cache. |
510 | */ |
511 | tsk->thread.load_slb++; |
512 | if (!tsk->thread.load_slb) { |
513 | unsigned long pc = KSTK_EIP(tsk); |
514 | |
515 | preload_age(ti); |
516 | preload_add(ti, ea: pc); |
517 | } |
518 | |
519 | for (i = 0; i < ti->slb_preload_nr; i++) { |
520 | unsigned char idx; |
521 | unsigned long ea; |
522 | |
523 | idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; |
524 | ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT; |
525 | |
526 | slb_allocate_user(mm, ea); |
527 | } |
528 | |
529 | /* |
530 | * Synchronize slbmte preloads with possible subsequent user memory |
531 | * address accesses by the kernel (user mode won't happen until |
532 | * rfid, which is safe). |
533 | */ |
534 | isync(); |
535 | } |
536 | |
537 | void slb_set_size(u16 size) |
538 | { |
539 | mmu_slb_size = size; |
540 | } |
541 | |
542 | void slb_initialize(void) |
543 | { |
544 | unsigned long linear_llp, vmalloc_llp, io_llp; |
545 | unsigned long lflags; |
546 | static int slb_encoding_inited; |
547 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
548 | unsigned long vmemmap_llp; |
549 | #endif |
550 | |
551 | /* Prepare our SLB miss handler based on our page size */ |
552 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; |
553 | io_llp = mmu_psize_defs[mmu_io_psize].sllp; |
554 | vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; |
555 | get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp; |
556 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
557 | vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp; |
558 | #endif |
559 | if (!slb_encoding_inited) { |
560 | slb_encoding_inited = 1; |
561 | pr_devel("SLB: linear LLP = %04lx\n" , linear_llp); |
562 | pr_devel("SLB: io LLP = %04lx\n" , io_llp); |
563 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
564 | pr_devel("SLB: vmemmap LLP = %04lx\n" , vmemmap_llp); |
565 | #endif |
566 | } |
567 | |
568 | get_paca()->stab_rr = SLB_NUM_BOLTED - 1; |
569 | get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; |
570 | get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; |
571 | |
572 | lflags = SLB_VSID_KERNEL | linear_llp; |
573 | |
574 | /* Invalidate the entire SLB (even entry 0) & all the ERATS */ |
575 | asm volatile("isync" :::"memory" ); |
576 | asm volatile("slbmte %0,%0" ::"r" (0) : "memory" ); |
577 | asm volatile("isync; slbia; isync" :::"memory" ); |
578 | create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX); |
579 | |
580 | /* |
581 | * For the boot cpu, we're running on the stack in init_thread_union, |
582 | * which is in the first segment of the linear mapping, and also |
583 | * get_paca()->kstack hasn't been initialized yet. |
584 | * For secondary cpus, we need to bolt the kernel stack entry now. |
585 | */ |
586 | slb_shadow_clear(KSTACK_INDEX); |
587 | if (raw_smp_processor_id() != boot_cpuid && |
588 | (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET) |
589 | create_shadowed_slbe(get_paca()->kstack, |
590 | mmu_kernel_ssize, lflags, KSTACK_INDEX); |
591 | |
592 | asm volatile("isync" :::"memory" ); |
593 | } |
594 | |
595 | static void slb_cache_update(unsigned long esid_data) |
596 | { |
597 | int slb_cache_index; |
598 | |
599 | if (cpu_has_feature(CPU_FTR_ARCH_300)) |
600 | return; /* ISAv3.0B and later does not use slb_cache */ |
601 | |
602 | if (stress_slb()) |
603 | return; |
604 | |
605 | /* |
606 | * Now update slb cache entries |
607 | */ |
608 | slb_cache_index = local_paca->slb_cache_ptr; |
609 | if (slb_cache_index < SLB_CACHE_ENTRIES) { |
610 | /* |
611 | * We have space in slb cache for optimized switch_slb(). |
612 | * Top 36 bits from esid_data as per ISA |
613 | */ |
614 | local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT; |
615 | local_paca->slb_cache_ptr++; |
616 | } else { |
617 | /* |
618 | * Our cache is full and the current cache content strictly |
619 | * doesn't indicate the active SLB contents. Bump the ptr |
620 | * so that switch_slb() will ignore the cache. |
621 | */ |
622 | local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; |
623 | } |
624 | } |
625 | |
626 | static enum slb_index alloc_slb_index(bool kernel) |
627 | { |
628 | enum slb_index index; |
629 | |
630 | /* |
631 | * The allocation bitmaps can become out of synch with the SLB |
632 | * when the _switch code does slbie when bolting a new stack |
633 | * segment and it must not be anywhere else in the SLB. This leaves |
634 | * a kernel allocated entry that is unused in the SLB. With very |
635 | * large systems or small segment sizes, the bitmaps could slowly |
636 | * fill with these entries. They will eventually be cleared out |
637 | * by the round robin allocator in that case, so it's probably not |
638 | * worth accounting for. |
639 | */ |
640 | |
641 | /* |
642 | * SLBs beyond 32 entries are allocated with stab_rr only |
643 | * POWER7/8/9 have 32 SLB entries, this could be expanded if a |
644 | * future CPU has more. |
645 | */ |
646 | if (local_paca->slb_used_bitmap != U32_MAX) { |
647 | index = ffz(local_paca->slb_used_bitmap); |
648 | local_paca->slb_used_bitmap |= 1U << index; |
649 | if (kernel) |
650 | local_paca->slb_kern_bitmap |= 1U << index; |
651 | } else { |
652 | /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */ |
653 | index = local_paca->stab_rr; |
654 | if (index < (mmu_slb_size - 1)) |
655 | index++; |
656 | else |
657 | index = SLB_NUM_BOLTED; |
658 | local_paca->stab_rr = index; |
659 | if (index < 32) { |
660 | if (kernel) |
661 | local_paca->slb_kern_bitmap |= 1U << index; |
662 | else |
663 | local_paca->slb_kern_bitmap &= ~(1U << index); |
664 | } |
665 | } |
666 | BUG_ON(index < SLB_NUM_BOLTED); |
667 | |
668 | return index; |
669 | } |
670 | |
671 | static long slb_insert_entry(unsigned long ea, unsigned long context, |
672 | unsigned long flags, int ssize, bool kernel) |
673 | { |
674 | unsigned long vsid; |
675 | unsigned long vsid_data, esid_data; |
676 | enum slb_index index; |
677 | |
678 | vsid = get_vsid(context, ea, ssize); |
679 | if (!vsid) |
680 | return -EFAULT; |
681 | |
682 | /* |
683 | * There must not be a kernel SLB fault in alloc_slb_index or before |
684 | * slbmte here or the allocation bitmaps could get out of whack with |
685 | * the SLB. |
686 | * |
687 | * User SLB faults or preloads take this path which might get inlined |
688 | * into the caller, so add compiler barriers here to ensure unsafe |
689 | * memory accesses do not come between. |
690 | */ |
691 | barrier(); |
692 | |
693 | index = alloc_slb_index(kernel); |
694 | |
695 | vsid_data = __mk_vsid_data(vsid, ssize, flags); |
696 | esid_data = mk_esid_data(ea, ssize, index); |
697 | |
698 | /* |
699 | * No need for an isync before or after this slbmte. The exception |
700 | * we enter with and the rfid we exit with are context synchronizing. |
701 | * User preloads should add isync afterwards in case the kernel |
702 | * accesses user memory before it returns to userspace with rfid. |
703 | */ |
704 | assert_slb_presence(present: false, ea); |
705 | if (stress_slb()) { |
706 | int slb_cache_index = local_paca->slb_cache_ptr; |
707 | |
708 | /* |
709 | * stress_slb() does not use slb cache, repurpose as a |
710 | * cache of inserted (non-bolted) kernel SLB entries. All |
711 | * non-bolted kernel entries are flushed on any user fault, |
712 | * or if there are already 3 non-boled kernel entries. |
713 | */ |
714 | BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3); |
715 | if (!kernel || slb_cache_index == 3) { |
716 | int i; |
717 | |
718 | for (i = 0; i < slb_cache_index; i++) |
719 | slb_cache_slbie_kernel(index: i); |
720 | slb_cache_index = 0; |
721 | } |
722 | |
723 | if (kernel) |
724 | local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT; |
725 | local_paca->slb_cache_ptr = slb_cache_index; |
726 | } |
727 | asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); |
728 | |
729 | barrier(); |
730 | |
731 | if (!kernel) |
732 | slb_cache_update(esid_data); |
733 | |
734 | return 0; |
735 | } |
736 | |
737 | static long slb_allocate_kernel(unsigned long ea, unsigned long id) |
738 | { |
739 | unsigned long context; |
740 | unsigned long flags; |
741 | int ssize; |
742 | |
743 | if (id == LINEAR_MAP_REGION_ID) { |
744 | |
745 | /* We only support upto H_MAX_PHYSMEM_BITS */ |
746 | if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS)) |
747 | return -EFAULT; |
748 | |
749 | flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; |
750 | |
751 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
752 | } else if (id == VMEMMAP_REGION_ID) { |
753 | |
754 | if (ea >= H_VMEMMAP_END) |
755 | return -EFAULT; |
756 | |
757 | flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp; |
758 | #endif |
759 | } else if (id == VMALLOC_REGION_ID) { |
760 | |
761 | if (ea >= H_VMALLOC_END) |
762 | return -EFAULT; |
763 | |
764 | flags = local_paca->vmalloc_sllp; |
765 | |
766 | } else if (id == IO_REGION_ID) { |
767 | |
768 | if (ea >= H_KERN_IO_END) |
769 | return -EFAULT; |
770 | |
771 | flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; |
772 | |
773 | } else { |
774 | return -EFAULT; |
775 | } |
776 | |
777 | ssize = MMU_SEGSIZE_1T; |
778 | if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) |
779 | ssize = MMU_SEGSIZE_256M; |
780 | |
781 | context = get_kernel_context(ea); |
782 | |
783 | return slb_insert_entry(ea, context, flags, ssize, kernel: true); |
784 | } |
785 | |
786 | static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) |
787 | { |
788 | unsigned long context; |
789 | unsigned long flags; |
790 | int bpsize; |
791 | int ssize; |
792 | |
793 | /* |
794 | * consider this as bad access if we take a SLB miss |
795 | * on an address above addr limit. |
796 | */ |
797 | if (ea >= mm_ctx_slb_addr_limit(&mm->context)) |
798 | return -EFAULT; |
799 | |
800 | context = get_user_context(&mm->context, ea); |
801 | if (!context) |
802 | return -EFAULT; |
803 | |
804 | if (unlikely(ea >= H_PGTABLE_RANGE)) { |
805 | WARN_ON(1); |
806 | return -EFAULT; |
807 | } |
808 | |
809 | ssize = user_segment_size(ea); |
810 | |
811 | bpsize = get_slice_psize(mm, ea); |
812 | flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; |
813 | |
814 | return slb_insert_entry(ea, context, flags, ssize, kernel: false); |
815 | } |
816 | |
817 | DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault) |
818 | { |
819 | unsigned long ea = regs->dar; |
820 | unsigned long id = get_region_id(ea); |
821 | |
822 | /* IRQs are not reconciled here, so can't check irqs_disabled */ |
823 | VM_WARN_ON(mfmsr() & MSR_EE); |
824 | |
825 | if (regs_is_unrecoverable(regs)) |
826 | return -EINVAL; |
827 | |
828 | /* |
829 | * SLB kernel faults must be very careful not to touch anything that is |
830 | * not bolted. E.g., PACA and global variables are okay, mm->context |
831 | * stuff is not. SLB user faults may access all of memory (and induce |
832 | * one recursive SLB kernel fault), so the kernel fault must not |
833 | * trample on the user fault state at those points. |
834 | */ |
835 | |
836 | /* |
837 | * This is a raw interrupt handler, for performance, so that |
838 | * fast_interrupt_return can be used. The handler must not touch local |
839 | * irq state, or schedule. We could test for usermode and upgrade to a |
840 | * normal process context (synchronous) interrupt for those, which |
841 | * would make them first-class kernel code and able to be traced and |
842 | * instrumented, although performance would suffer a bit, it would |
843 | * probably be a good tradeoff. |
844 | */ |
845 | if (id >= LINEAR_MAP_REGION_ID) { |
846 | long err; |
847 | #ifdef CONFIG_DEBUG_VM |
848 | /* Catch recursive kernel SLB faults. */ |
849 | BUG_ON(local_paca->in_kernel_slb_handler); |
850 | local_paca->in_kernel_slb_handler = 1; |
851 | #endif |
852 | err = slb_allocate_kernel(ea, id); |
853 | #ifdef CONFIG_DEBUG_VM |
854 | local_paca->in_kernel_slb_handler = 0; |
855 | #endif |
856 | return err; |
857 | } else { |
858 | struct mm_struct *mm = current->mm; |
859 | long err; |
860 | |
861 | if (unlikely(!mm)) |
862 | return -EFAULT; |
863 | |
864 | err = slb_allocate_user(mm, ea); |
865 | if (!err) |
866 | preload_add(current_thread_info(), ea); |
867 | |
868 | return err; |
869 | } |
870 | } |
871 | |