1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4 |
4 | * |
5 | * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) |
6 | * |
7 | */ |
8 | |
9 | #include <linux/module.h> |
10 | #include <linux/bug.h> |
11 | #include <linux/mm_types.h> |
12 | |
13 | #include <asm/arcregs.h> |
14 | #include <asm/setup.h> |
15 | #include <asm/mmu_context.h> |
16 | #include <asm/mmu.h> |
17 | |
18 | /* A copy of the ASID from the PID reg is kept in asid_cache */ |
19 | DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; |
20 | |
21 | static struct cpuinfo_arc_mmu { |
22 | unsigned int ver, pg_sz_k, s_pg_sz_m, pae, sets, ways; |
23 | } mmuinfo; |
24 | |
25 | /* |
26 | * Utility Routine to erase a J-TLB entry |
27 | * Caller needs to setup Index Reg (manually or via getIndex) |
28 | */ |
29 | static inline void __tlb_entry_erase(void) |
30 | { |
31 | write_aux_reg(ARC_REG_TLBPD1, 0); |
32 | |
33 | if (is_pae40_enabled()) |
34 | write_aux_reg(ARC_REG_TLBPD1HI, 0); |
35 | |
36 | write_aux_reg(ARC_REG_TLBPD0, 0); |
37 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); |
38 | } |
39 | |
40 | static void utlb_invalidate(void) |
41 | { |
42 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); |
43 | } |
44 | |
45 | #ifdef CONFIG_ARC_MMU_V3 |
46 | |
47 | static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) |
48 | { |
49 | unsigned int idx; |
50 | |
51 | write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid); |
52 | |
53 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); |
54 | idx = read_aux_reg(ARC_REG_TLBINDEX); |
55 | |
56 | return idx; |
57 | } |
58 | |
59 | static void tlb_entry_erase(unsigned int vaddr_n_asid) |
60 | { |
61 | unsigned int idx; |
62 | |
63 | /* Locate the TLB entry for this vaddr + ASID */ |
64 | idx = tlb_entry_lkup(vaddr_n_asid); |
65 | |
66 | /* No error means entry found, zero it out */ |
67 | if (likely(!(idx & TLB_LKUP_ERR))) { |
68 | __tlb_entry_erase(); |
69 | } else { |
70 | /* Duplicate entry error */ |
71 | WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n" , |
72 | vaddr_n_asid); |
73 | } |
74 | } |
75 | |
76 | static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1) |
77 | { |
78 | unsigned int idx; |
79 | |
80 | /* |
81 | * First verify if entry for this vaddr+ASID already exists |
82 | * This also sets up PD0 (vaddr, ASID..) for final commit |
83 | */ |
84 | idx = tlb_entry_lkup(pd0); |
85 | |
86 | /* |
87 | * If Not already present get a free slot from MMU. |
88 | * Otherwise, Probe would have located the entry and set INDEX Reg |
89 | * with existing location. This will cause Write CMD to over-write |
90 | * existing entry with new PD0 and PD1 |
91 | */ |
92 | if (likely(idx & TLB_LKUP_ERR)) |
93 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); |
94 | |
95 | /* setup the other half of TLB entry (pfn, rwx..) */ |
96 | write_aux_reg(ARC_REG_TLBPD1, pd1); |
97 | |
98 | /* |
99 | * Commit the Entry to MMU |
100 | * It doesn't sound safe to use the TLBWriteNI cmd here |
101 | * which doesn't flush uTLBs. I'd rather be safe than sorry. |
102 | */ |
103 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); |
104 | } |
105 | |
106 | #else /* MMUv4 */ |
107 | |
108 | static void tlb_entry_erase(unsigned int vaddr_n_asid) |
109 | { |
110 | write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT); |
111 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); |
112 | } |
113 | |
114 | static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1) |
115 | { |
116 | write_aux_reg(ARC_REG_TLBPD0, pd0); |
117 | |
118 | if (!is_pae40_enabled()) { |
119 | write_aux_reg(ARC_REG_TLBPD1, pd1); |
120 | } else { |
121 | write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF); |
122 | write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); |
123 | } |
124 | |
125 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); |
126 | } |
127 | |
128 | #endif |
129 | |
130 | /* |
131 | * Un-conditionally (without lookup) erase the entire MMU contents |
132 | */ |
133 | |
134 | noinline void local_flush_tlb_all(void) |
135 | { |
136 | struct cpuinfo_arc_mmu *mmu = &mmuinfo; |
137 | unsigned long flags; |
138 | unsigned int entry; |
139 | int num_tlb = mmu->sets * mmu->ways; |
140 | |
141 | local_irq_save(flags); |
142 | |
143 | /* Load PD0 and PD1 with template for a Blank Entry */ |
144 | write_aux_reg(ARC_REG_TLBPD1, 0); |
145 | |
146 | if (is_pae40_enabled()) |
147 | write_aux_reg(ARC_REG_TLBPD1HI, 0); |
148 | |
149 | write_aux_reg(ARC_REG_TLBPD0, 0); |
150 | |
151 | for (entry = 0; entry < num_tlb; entry++) { |
152 | /* write this entry to the TLB */ |
153 | write_aux_reg(ARC_REG_TLBINDEX, entry); |
154 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI); |
155 | } |
156 | |
157 | if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { |
158 | const int stlb_idx = 0x800; |
159 | |
160 | /* Blank sTLB entry */ |
161 | write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ); |
162 | |
163 | for (entry = stlb_idx; entry < stlb_idx + 16; entry++) { |
164 | write_aux_reg(ARC_REG_TLBINDEX, entry); |
165 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBWriteNI); |
166 | } |
167 | } |
168 | |
169 | utlb_invalidate(); |
170 | |
171 | local_irq_restore(flags); |
172 | } |
173 | |
174 | /* |
175 | * Flush the entire MM for userland. The fastest way is to move to Next ASID |
176 | */ |
177 | noinline void local_flush_tlb_mm(struct mm_struct *mm) |
178 | { |
179 | /* |
180 | * Small optimisation courtesy IA64 |
181 | * flush_mm called during fork,exit,munmap etc, multiple times as well. |
182 | * Only for fork( ) do we need to move parent to a new MMU ctxt, |
183 | * all other cases are NOPs, hence this check. |
184 | */ |
185 | if (atomic_read(v: &mm->mm_users) == 0) |
186 | return; |
187 | |
188 | /* |
189 | * - Move to a new ASID, but only if the mm is still wired in |
190 | * (Android Binder ended up calling this for vma->mm != tsk->mm, |
191 | * causing h/w - s/w ASID to get out of sync) |
192 | * - Also get_new_mmu_context() new implementation allocates a new |
193 | * ASID only if it is not allocated already - so unallocate first |
194 | */ |
195 | destroy_context(mm); |
196 | if (current->mm == mm) |
197 | get_new_mmu_context(mm); |
198 | } |
199 | |
200 | /* |
201 | * Flush a Range of TLB entries for userland. |
202 | * @start is inclusive, while @end is exclusive |
203 | * Difference between this and Kernel Range Flush is |
204 | * -Here the fastest way (if range is too large) is to move to next ASID |
205 | * without doing any explicit Shootdown |
206 | * -In case of kernel Flush, entry has to be shot down explicitly |
207 | */ |
208 | void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, |
209 | unsigned long end) |
210 | { |
211 | const unsigned int cpu = smp_processor_id(); |
212 | unsigned long flags; |
213 | |
214 | /* If range @start to @end is more than 32 TLB entries deep, |
215 | * its better to move to a new ASID rather than searching for |
216 | * individual entries and then shooting them down |
217 | * |
218 | * The calc above is rough, doesn't account for unaligned parts, |
219 | * since this is heuristics based anyways |
220 | */ |
221 | if (unlikely((end - start) >= PAGE_SIZE * 32)) { |
222 | local_flush_tlb_mm(mm: vma->vm_mm); |
223 | return; |
224 | } |
225 | |
226 | /* |
227 | * @start moved to page start: this alone suffices for checking |
228 | * loop end condition below, w/o need for aligning @end to end |
229 | * e.g. 2000 to 4001 will anyhow loop twice |
230 | */ |
231 | start &= PAGE_MASK; |
232 | |
233 | local_irq_save(flags); |
234 | |
235 | if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { |
236 | while (start < end) { |
237 | tlb_entry_erase(vaddr_n_asid: start | hw_pid(vma->vm_mm, cpu)); |
238 | start += PAGE_SIZE; |
239 | } |
240 | } |
241 | |
242 | local_irq_restore(flags); |
243 | } |
244 | |
245 | /* Flush the kernel TLB entries - vmalloc/modules (Global from MMU perspective) |
246 | * @start, @end interpreted as kvaddr |
247 | * Interestingly, shared TLB entries can also be flushed using just |
248 | * @start,@end alone (interpreted as user vaddr), although technically SASID |
249 | * is also needed. However our smart TLbProbe lookup takes care of that. |
250 | */ |
251 | void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) |
252 | { |
253 | unsigned long flags; |
254 | |
255 | /* exactly same as above, except for TLB entry not taking ASID */ |
256 | |
257 | if (unlikely((end - start) >= PAGE_SIZE * 32)) { |
258 | local_flush_tlb_all(); |
259 | return; |
260 | } |
261 | |
262 | start &= PAGE_MASK; |
263 | |
264 | local_irq_save(flags); |
265 | while (start < end) { |
266 | tlb_entry_erase(vaddr_n_asid: start); |
267 | start += PAGE_SIZE; |
268 | } |
269 | |
270 | local_irq_restore(flags); |
271 | } |
272 | |
273 | /* |
274 | * Delete TLB entry in MMU for a given page (??? address) |
275 | * NOTE One TLB entry contains translation for single PAGE |
276 | */ |
277 | |
278 | void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) |
279 | { |
280 | const unsigned int cpu = smp_processor_id(); |
281 | unsigned long flags; |
282 | |
283 | /* Note that it is critical that interrupts are DISABLED between |
284 | * checking the ASID and using it flush the TLB entry |
285 | */ |
286 | local_irq_save(flags); |
287 | |
288 | if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { |
289 | tlb_entry_erase(vaddr_n_asid: (page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); |
290 | } |
291 | |
292 | local_irq_restore(flags); |
293 | } |
294 | |
295 | #ifdef CONFIG_SMP |
296 | |
297 | struct tlb_args { |
298 | struct vm_area_struct *ta_vma; |
299 | unsigned long ta_start; |
300 | unsigned long ta_end; |
301 | }; |
302 | |
303 | static inline void ipi_flush_tlb_page(void *arg) |
304 | { |
305 | struct tlb_args *ta = arg; |
306 | |
307 | local_flush_tlb_page(vma: ta->ta_vma, page: ta->ta_start); |
308 | } |
309 | |
310 | static inline void ipi_flush_tlb_range(void *arg) |
311 | { |
312 | struct tlb_args *ta = arg; |
313 | |
314 | local_flush_tlb_range(vma: ta->ta_vma, start: ta->ta_start, end: ta->ta_end); |
315 | } |
316 | |
317 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
318 | static inline void ipi_flush_pmd_tlb_range(void *arg) |
319 | { |
320 | struct tlb_args *ta = arg; |
321 | |
322 | local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); |
323 | } |
324 | #endif |
325 | |
326 | static inline void ipi_flush_tlb_kernel_range(void *arg) |
327 | { |
328 | struct tlb_args *ta = (struct tlb_args *)arg; |
329 | |
330 | local_flush_tlb_kernel_range(start: ta->ta_start, end: ta->ta_end); |
331 | } |
332 | |
333 | void flush_tlb_all(void) |
334 | { |
335 | on_each_cpu(func: (smp_call_func_t)local_flush_tlb_all, NULL, wait: 1); |
336 | } |
337 | |
338 | void flush_tlb_mm(struct mm_struct *mm) |
339 | { |
340 | on_each_cpu_mask(mask: mm_cpumask(mm), func: (smp_call_func_t)local_flush_tlb_mm, |
341 | info: mm, wait: 1); |
342 | } |
343 | |
344 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) |
345 | { |
346 | struct tlb_args ta = { |
347 | .ta_vma = vma, |
348 | .ta_start = uaddr |
349 | }; |
350 | |
351 | on_each_cpu_mask(mask: mm_cpumask(mm: vma->vm_mm), func: ipi_flush_tlb_page, info: &ta, wait: 1); |
352 | } |
353 | |
354 | void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, |
355 | unsigned long end) |
356 | { |
357 | struct tlb_args ta = { |
358 | .ta_vma = vma, |
359 | .ta_start = start, |
360 | .ta_end = end |
361 | }; |
362 | |
363 | on_each_cpu_mask(mask: mm_cpumask(mm: vma->vm_mm), func: ipi_flush_tlb_range, info: &ta, wait: 1); |
364 | } |
365 | |
366 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
367 | void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, |
368 | unsigned long end) |
369 | { |
370 | struct tlb_args ta = { |
371 | .ta_vma = vma, |
372 | .ta_start = start, |
373 | .ta_end = end |
374 | }; |
375 | |
376 | on_each_cpu_mask(mask: mm_cpumask(mm: vma->vm_mm), func: ipi_flush_pmd_tlb_range, info: &ta, wait: 1); |
377 | } |
378 | #endif |
379 | |
380 | void flush_tlb_kernel_range(unsigned long start, unsigned long end) |
381 | { |
382 | struct tlb_args ta = { |
383 | .ta_start = start, |
384 | .ta_end = end |
385 | }; |
386 | |
387 | on_each_cpu(func: ipi_flush_tlb_kernel_range, info: &ta, wait: 1); |
388 | } |
389 | #endif |
390 | |
391 | /* |
392 | * Routine to create a TLB entry |
393 | */ |
394 | static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) |
395 | { |
396 | unsigned long flags; |
397 | unsigned int asid_or_sasid, rwx; |
398 | unsigned long pd0; |
399 | phys_addr_t pd1; |
400 | |
401 | /* |
402 | * create_tlb() assumes that current->mm == vma->mm, since |
403 | * -it ASID for TLB entry is fetched from MMU ASID reg (valid for curr) |
404 | * -completes the lazy write to SASID reg (again valid for curr tsk) |
405 | * |
406 | * Removing the assumption involves |
407 | * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg. |
408 | * -More importantly it makes this handler inconsistent with fast-path |
409 | * TLB Refill handler which always deals with "current" |
410 | * |
411 | * Lets see the use cases when current->mm != vma->mm and we land here |
412 | * 1. execve->copy_strings()->__get_user_pages->handle_mm_fault |
413 | * Here VM wants to pre-install a TLB entry for user stack while |
414 | * current->mm still points to pre-execve mm (hence the condition). |
415 | * However the stack vaddr is soon relocated (randomization) and |
416 | * move_page_tables() tries to undo that TLB entry. |
417 | * Thus not creating TLB entry is not any worse. |
418 | * |
419 | * 2. ptrace(POKETEXT) causes a CoW - debugger(current) inserting a |
420 | * breakpoint in debugged task. Not creating a TLB now is not |
421 | * performance critical. |
422 | * |
423 | * Both the cases above are not good enough for code churn. |
424 | */ |
425 | if (current->active_mm != vma->vm_mm) |
426 | return; |
427 | |
428 | local_irq_save(flags); |
429 | |
430 | vaddr &= PAGE_MASK; |
431 | |
432 | /* update this PTE credentials */ |
433 | pte_val(pte: *ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); |
434 | |
435 | /* Create HW TLB(PD0,PD1) from PTE */ |
436 | |
437 | /* ASID for this task */ |
438 | asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; |
439 | |
440 | pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); |
441 | |
442 | /* |
443 | * ARC MMU provides fully orthogonal access bits for K/U mode, |
444 | * however Linux only saves 1 set to save PTE real-estate |
445 | * Here we convert 3 PTE bits into 6 MMU bits: |
446 | * -Kernel only entries have Kr Kw Kx 0 0 0 |
447 | * -User entries have mirrored K and U bits |
448 | */ |
449 | rwx = pte_val(*ptep) & PTE_BITS_RWX; |
450 | |
451 | if (pte_val(pte: *ptep) & _PAGE_GLOBAL) |
452 | rwx <<= 3; /* r w x => Kr Kw Kx 0 0 0 */ |
453 | else |
454 | rwx |= (rwx << 3); /* r w x => Kr Kw Kx Ur Uw Ux */ |
455 | |
456 | pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1); |
457 | |
458 | tlb_entry_insert(pd0, pd1); |
459 | |
460 | local_irq_restore(flags); |
461 | } |
462 | |
463 | /* |
464 | * Called at the end of pagefault, for a userspace mapped page |
465 | * -pre-install the corresponding TLB entry into MMU |
466 | * -Finalize the delayed D-cache flush of kernel mapping of page due to |
467 | * flush_dcache_page(), copy_user_page() |
468 | * |
469 | * Note that flush (when done) involves both WBACK - so physical page is |
470 | * in sync as well as INV - so any non-congruent aliases don't remain |
471 | */ |
472 | void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, |
473 | unsigned long vaddr_unaligned, pte_t *ptep, unsigned int nr) |
474 | { |
475 | unsigned long vaddr = vaddr_unaligned & PAGE_MASK; |
476 | phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS; |
477 | struct page *page = pfn_to_page(pte_pfn(*ptep)); |
478 | |
479 | create_tlb(vma, vaddr, ptep); |
480 | |
481 | if (page == ZERO_PAGE(0)) |
482 | return; |
483 | |
484 | /* |
485 | * For executable pages, since icache doesn't snoop dcache, any |
486 | * dirty K-mapping of a code page needs to be wback+inv so that |
487 | * icache fetch by userspace sees code correctly. |
488 | */ |
489 | if (vma->vm_flags & VM_EXEC) { |
490 | struct folio *folio = page_folio(page); |
491 | int dirty = !test_and_set_bit(PG_dc_clean, &folio->flags); |
492 | if (dirty) { |
493 | unsigned long offset = offset_in_folio(folio, paddr); |
494 | nr = folio_nr_pages(folio); |
495 | paddr -= offset; |
496 | vaddr -= offset; |
497 | /* wback + inv dcache lines (K-mapping) */ |
498 | __flush_dcache_pages(paddr, paddr, nr); |
499 | |
500 | /* invalidate any existing icache lines (U-mapping) */ |
501 | if (vma->vm_flags & VM_EXEC) |
502 | __inv_icache_pages(paddr, vaddr, nr); |
503 | } |
504 | } |
505 | } |
506 | |
507 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
508 | |
509 | /* |
510 | * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP |
511 | * support. |
512 | * |
513 | * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a |
514 | * new bit "SZ" in TLB page descriptor to distinguish between them. |
515 | * Super Page size is configurable in hardware (4K to 16M), but fixed once |
516 | * RTL builds. |
517 | * |
518 | * The exact THP size a Linux configuration will support is a function of: |
519 | * - MMU page size (typical 8K, RTL fixed) |
520 | * - software page walker address split between PGD:PTE:PFN (typical |
521 | * 11:8:13, but can be changed with 1 line) |
522 | * So for above default, THP size supported is 8K * (2^8) = 2M |
523 | * |
524 | * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime |
525 | * reduces to 1 level (as PTE is folded into PGD and canonically referred |
526 | * to as PMD). |
527 | * Thus THP PMD accessors are implemented in terms of PTE (just like sparc) |
528 | */ |
529 | |
530 | void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, |
531 | pmd_t *pmd) |
532 | { |
533 | pte_t pte = __pte(val: pmd_val(pmd: *pmd)); |
534 | update_mmu_cache_range(NULL, vma, addr, &pte, HPAGE_PMD_NR); |
535 | } |
536 | |
537 | void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, |
538 | unsigned long end) |
539 | { |
540 | unsigned int cpu; |
541 | unsigned long flags; |
542 | |
543 | local_irq_save(flags); |
544 | |
545 | cpu = smp_processor_id(); |
546 | |
547 | if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) { |
548 | unsigned int asid = hw_pid(vma->vm_mm, cpu); |
549 | |
550 | /* No need to loop here: this will always be for 1 Huge Page */ |
551 | tlb_entry_erase(start | _PAGE_HW_SZ | asid); |
552 | } |
553 | |
554 | local_irq_restore(flags); |
555 | } |
556 | |
557 | #endif |
558 | |
559 | /* Read the Cache Build Configuration Registers, Decode them and save into |
560 | * the cpuinfo structure for later use. |
561 | * No Validation is done here, simply read/convert the BCRs |
562 | */ |
563 | int arc_mmu_mumbojumbo(int c, char *buf, int len) |
564 | { |
565 | struct cpuinfo_arc_mmu *mmu = &mmuinfo; |
566 | unsigned int bcr, u_dtlb, u_itlb, sasid; |
567 | struct bcr_mmu_3 *mmu3; |
568 | struct bcr_mmu_4 *mmu4; |
569 | char super_pg[64] = "" ; |
570 | int n = 0; |
571 | |
572 | bcr = read_aux_reg(ARC_REG_MMU_BCR); |
573 | mmu->ver = (bcr >> 24); |
574 | |
575 | if (is_isa_arcompact() && mmu->ver == 3) { |
576 | mmu3 = (struct bcr_mmu_3 *)&bcr; |
577 | mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); |
578 | mmu->sets = 1 << mmu3->sets; |
579 | mmu->ways = 1 << mmu3->ways; |
580 | u_dtlb = mmu3->u_dtlb; |
581 | u_itlb = mmu3->u_itlb; |
582 | sasid = mmu3->sasid; |
583 | } else { |
584 | mmu4 = (struct bcr_mmu_4 *)&bcr; |
585 | mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); |
586 | mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11); |
587 | mmu->sets = 64 << mmu4->n_entry; |
588 | mmu->ways = mmu4->n_ways * 2; |
589 | u_dtlb = mmu4->u_dtlb * 4; |
590 | u_itlb = mmu4->u_itlb * 4; |
591 | sasid = mmu4->sasid; |
592 | mmu->pae = mmu4->pae; |
593 | } |
594 | |
595 | if (mmu->s_pg_sz_m) |
596 | scnprintf(buf: super_pg, size: 64, fmt: "/%dM%s" , |
597 | mmu->s_pg_sz_m, |
598 | IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? " (THP enabled)" :"" ); |
599 | |
600 | n += scnprintf(buf + n, len - n, |
601 | "MMU [v%x]\t: %dk%s, swalk %d lvl, JTLB %dx%d, uDTLB %d, uITLB %d%s%s%s\n" , |
602 | mmu->ver, mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS, |
603 | mmu->sets, mmu->ways, |
604 | u_dtlb, u_itlb, |
605 | IS_AVAIL1(sasid, ", SASID" ), |
606 | IS_AVAIL2(mmu->pae, ", PAE40 " , CONFIG_ARC_HAS_PAE40)); |
607 | |
608 | return n; |
609 | } |
610 | |
611 | int pae40_exist_but_not_enab(void) |
612 | { |
613 | return mmuinfo.pae && !is_pae40_enabled(); |
614 | } |
615 | |
616 | void arc_mmu_init(void) |
617 | { |
618 | struct cpuinfo_arc_mmu *mmu = &mmuinfo; |
619 | int compat = 0; |
620 | |
621 | /* |
622 | * Can't be done in processor.h due to header include dependencies |
623 | */ |
624 | BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE)); |
625 | |
626 | /* |
627 | * stack top size sanity check, |
628 | * Can't be done in processor.h due to header include dependencies |
629 | */ |
630 | BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE)); |
631 | |
632 | /* |
633 | * Ensure that MMU features assumed by kernel exist in hardware. |
634 | * - For older ARC700 cpus, only v3 supported |
635 | * - For HS cpus, v4 was baseline and v5 is backwards compatible |
636 | * (will run older software). |
637 | */ |
638 | if (is_isa_arcompact() && mmu->ver == 3) |
639 | compat = 1; |
640 | else if (is_isa_arcv2() && mmu->ver >= 4) |
641 | compat = 1; |
642 | |
643 | if (!compat) |
644 | panic(fmt: "MMU ver %d doesn't match kernel built for\n" , mmu->ver); |
645 | |
646 | if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) |
647 | panic(fmt: "MMU pg size != PAGE_SIZE (%luk)\n" , TO_KB(PAGE_SIZE)); |
648 | |
649 | if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && |
650 | mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE)) |
651 | panic(fmt: "MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n" , |
652 | (unsigned long)TO_MB(HPAGE_PMD_SIZE)); |
653 | |
654 | if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) |
655 | panic(fmt: "Hardware doesn't support PAE40\n" ); |
656 | |
657 | /* Enable the MMU with ASID 0 */ |
658 | mmu_setup_asid(NULL, 0); |
659 | |
660 | /* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */ |
661 | mmu_setup_pgd(NULL, swapper_pg_dir); |
662 | |
663 | if (pae40_exist_but_not_enab()) |
664 | write_aux_reg(ARC_REG_TLBPD1HI, 0); |
665 | } |
666 | |
667 | /* |
668 | * TLB Programmer's Model uses Linear Indexes: 0 to {255, 511} for 128 x {2,4} |
669 | * The mapping is Column-first. |
670 | * --------------------- ----------- |
671 | * |way0|way1|way2|way3| |way0|way1| |
672 | * --------------------- ----------- |
673 | * [set0] | 0 | 1 | 2 | 3 | | 0 | 1 | |
674 | * [set1] | 4 | 5 | 6 | 7 | | 2 | 3 | |
675 | * ~ ~ ~ ~ |
676 | * [set127] | 508| 509| 510| 511| | 254| 255| |
677 | * --------------------- ----------- |
678 | * For normal operations we don't(must not) care how above works since |
679 | * MMU cmd getIndex(vaddr) abstracts that out. |
680 | * However for walking WAYS of a SET, we need to know this |
681 | */ |
682 | #define SET_WAY_TO_IDX(mmu, set, way) ((set) * mmu->ways + (way)) |
683 | |
684 | /* Handling of Duplicate PD (TLB entry) in MMU. |
685 | * -Could be due to buggy customer tapeouts or obscure kernel bugs |
686 | * -MMU complaints not at the time of duplicate PD installation, but at the |
687 | * time of lookup matching multiple ways. |
688 | * -Ideally these should never happen - but if they do - workaround by deleting |
689 | * the duplicate one. |
690 | * -Knob to be verbose abt it.(TODO: hook them up to debugfs) |
691 | */ |
692 | volatile int dup_pd_silent; /* Be silent abt it or complain (default) */ |
693 | |
694 | void do_tlb_overlap_fault(unsigned long cause, unsigned long address, |
695 | struct pt_regs *regs) |
696 | { |
697 | struct cpuinfo_arc_mmu *mmu = &mmuinfo; |
698 | unsigned long flags; |
699 | int set, n_ways = mmu->ways; |
700 | |
701 | n_ways = min(n_ways, 4); |
702 | BUG_ON(mmu->ways > 4); |
703 | |
704 | local_irq_save(flags); |
705 | |
706 | /* loop thru all sets of TLB */ |
707 | for (set = 0; set < mmu->sets; set++) { |
708 | |
709 | int is_valid, way; |
710 | unsigned int pd0[4]; |
711 | |
712 | /* read out all the ways of current set */ |
713 | for (way = 0, is_valid = 0; way < n_ways; way++) { |
714 | write_aux_reg(ARC_REG_TLBINDEX, |
715 | SET_WAY_TO_IDX(mmu, set, way)); |
716 | write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); |
717 | pd0[way] = read_aux_reg(ARC_REG_TLBPD0); |
718 | is_valid |= pd0[way] & _PAGE_PRESENT; |
719 | pd0[way] &= PAGE_MASK; |
720 | } |
721 | |
722 | /* If all the WAYS in SET are empty, skip to next SET */ |
723 | if (!is_valid) |
724 | continue; |
725 | |
726 | /* Scan the set for duplicate ways: needs a nested loop */ |
727 | for (way = 0; way < n_ways - 1; way++) { |
728 | |
729 | int n; |
730 | |
731 | if (!pd0[way]) |
732 | continue; |
733 | |
734 | for (n = way + 1; n < n_ways; n++) { |
735 | if (pd0[way] != pd0[n]) |
736 | continue; |
737 | |
738 | if (!dup_pd_silent) |
739 | pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n" , |
740 | pd0[way], set, way, n); |
741 | |
742 | /* |
743 | * clear entry @way and not @n. |
744 | * This is critical to our optimised loop |
745 | */ |
746 | pd0[way] = 0; |
747 | write_aux_reg(ARC_REG_TLBINDEX, |
748 | SET_WAY_TO_IDX(mmu, set, way)); |
749 | __tlb_entry_erase(); |
750 | } |
751 | } |
752 | } |
753 | |
754 | local_irq_restore(flags); |
755 | } |
756 | |