1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * SPARC64 Huge TLB page support. |
4 | * |
5 | * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) |
6 | */ |
7 | |
8 | #include <linux/fs.h> |
9 | #include <linux/mm.h> |
10 | #include <linux/sched/mm.h> |
11 | #include <linux/hugetlb.h> |
12 | #include <linux/pagemap.h> |
13 | #include <linux/sysctl.h> |
14 | |
15 | #include <asm/mman.h> |
16 | #include <asm/pgalloc.h> |
17 | #include <asm/tlb.h> |
18 | #include <asm/tlbflush.h> |
19 | #include <asm/cacheflush.h> |
20 | #include <asm/mmu_context.h> |
21 | |
22 | /* Slightly simplified from the non-hugepage variant because by |
23 | * definition we don't have to worry about any page coloring stuff |
24 | */ |
25 | |
26 | static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, |
27 | unsigned long addr, |
28 | unsigned long len, |
29 | unsigned long pgoff, |
30 | unsigned long flags) |
31 | { |
32 | struct hstate *h = hstate_file(f: filp); |
33 | unsigned long task_size = TASK_SIZE; |
34 | struct vm_unmapped_area_info info; |
35 | |
36 | if (test_thread_flag(TIF_32BIT)) |
37 | task_size = STACK_TOP32; |
38 | |
39 | info.flags = 0; |
40 | info.length = len; |
41 | info.low_limit = TASK_UNMAPPED_BASE; |
42 | info.high_limit = min(task_size, VA_EXCLUDE_START); |
43 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
44 | info.align_offset = 0; |
45 | addr = vm_unmapped_area(info: &info); |
46 | |
47 | if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) { |
48 | VM_BUG_ON(addr != -ENOMEM); |
49 | info.low_limit = VA_EXCLUDE_END; |
50 | info.high_limit = task_size; |
51 | addr = vm_unmapped_area(info: &info); |
52 | } |
53 | |
54 | return addr; |
55 | } |
56 | |
57 | static unsigned long |
58 | hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, |
59 | const unsigned long len, |
60 | const unsigned long pgoff, |
61 | const unsigned long flags) |
62 | { |
63 | struct hstate *h = hstate_file(f: filp); |
64 | struct mm_struct *mm = current->mm; |
65 | unsigned long addr = addr0; |
66 | struct vm_unmapped_area_info info; |
67 | |
68 | /* This should only ever run for 32-bit processes. */ |
69 | BUG_ON(!test_thread_flag(TIF_32BIT)); |
70 | |
71 | info.flags = VM_UNMAPPED_AREA_TOPDOWN; |
72 | info.length = len; |
73 | info.low_limit = PAGE_SIZE; |
74 | info.high_limit = mm->mmap_base; |
75 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
76 | info.align_offset = 0; |
77 | addr = vm_unmapped_area(info: &info); |
78 | |
79 | /* |
80 | * A failed mmap() very likely causes application failure, |
81 | * so fall back to the bottom-up function here. This scenario |
82 | * can happen with large stack limits and large mmap() |
83 | * allocations. |
84 | */ |
85 | if (addr & ~PAGE_MASK) { |
86 | VM_BUG_ON(addr != -ENOMEM); |
87 | info.flags = 0; |
88 | info.low_limit = TASK_UNMAPPED_BASE; |
89 | info.high_limit = STACK_TOP32; |
90 | addr = vm_unmapped_area(info: &info); |
91 | } |
92 | |
93 | return addr; |
94 | } |
95 | |
96 | unsigned long |
97 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
98 | unsigned long len, unsigned long pgoff, unsigned long flags) |
99 | { |
100 | struct hstate *h = hstate_file(f: file); |
101 | struct mm_struct *mm = current->mm; |
102 | struct vm_area_struct *vma; |
103 | unsigned long task_size = TASK_SIZE; |
104 | |
105 | if (test_thread_flag(TIF_32BIT)) |
106 | task_size = STACK_TOP32; |
107 | |
108 | if (len & ~huge_page_mask(h)) |
109 | return -EINVAL; |
110 | if (len > task_size) |
111 | return -ENOMEM; |
112 | |
113 | if (flags & MAP_FIXED) { |
114 | if (prepare_hugepage_range(file, addr, len)) |
115 | return -EINVAL; |
116 | return addr; |
117 | } |
118 | |
119 | if (addr) { |
120 | addr = ALIGN(addr, huge_page_size(h)); |
121 | vma = find_vma(mm, addr); |
122 | if (task_size - len >= addr && |
123 | (!vma || addr + len <= vm_start_gap(vma))) |
124 | return addr; |
125 | } |
126 | if (mm->get_unmapped_area == arch_get_unmapped_area) |
127 | return hugetlb_get_unmapped_area_bottomup(filp: file, addr, len, |
128 | pgoff, flags); |
129 | else |
130 | return hugetlb_get_unmapped_area_topdown(filp: file, addr0: addr, len, |
131 | pgoff, flags); |
132 | } |
133 | |
134 | static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift) |
135 | { |
136 | return entry; |
137 | } |
138 | |
139 | static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift) |
140 | { |
141 | unsigned long hugepage_size = _PAGE_SZ4MB_4V; |
142 | |
143 | pte_val(pte: entry) = pte_val(pte: entry) & ~_PAGE_SZALL_4V; |
144 | |
145 | switch (shift) { |
146 | case HPAGE_16GB_SHIFT: |
147 | hugepage_size = _PAGE_SZ16GB_4V; |
148 | pte_val(pte: entry) |= _PAGE_PUD_HUGE; |
149 | break; |
150 | case HPAGE_2GB_SHIFT: |
151 | hugepage_size = _PAGE_SZ2GB_4V; |
152 | pte_val(pte: entry) |= _PAGE_PMD_HUGE; |
153 | break; |
154 | case HPAGE_256MB_SHIFT: |
155 | hugepage_size = _PAGE_SZ256MB_4V; |
156 | pte_val(pte: entry) |= _PAGE_PMD_HUGE; |
157 | break; |
158 | case HPAGE_SHIFT: |
159 | pte_val(pte: entry) |= _PAGE_PMD_HUGE; |
160 | break; |
161 | case HPAGE_64K_SHIFT: |
162 | hugepage_size = _PAGE_SZ64K_4V; |
163 | break; |
164 | default: |
165 | WARN_ONCE(1, "unsupported hugepage shift=%u\n" , shift); |
166 | } |
167 | |
168 | pte_val(pte: entry) = pte_val(pte: entry) | hugepage_size; |
169 | return entry; |
170 | } |
171 | |
172 | static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift) |
173 | { |
174 | if (tlb_type == hypervisor) |
175 | return sun4v_hugepage_shift_to_tte(entry, shift); |
176 | else |
177 | return sun4u_hugepage_shift_to_tte(entry, shift); |
178 | } |
179 | |
180 | pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) |
181 | { |
182 | pte_t pte; |
183 | |
184 | entry = pte_mkhuge(pte: entry); |
185 | pte = hugepage_shift_to_tte(entry, shift); |
186 | |
187 | #ifdef CONFIG_SPARC64 |
188 | /* If this vma has ADI enabled on it, turn on TTE.mcd |
189 | */ |
190 | if (flags & VM_SPARC_ADI) |
191 | return pte_mkmcd(pte); |
192 | else |
193 | return pte_mknotmcd(pte); |
194 | #else |
195 | return pte; |
196 | #endif |
197 | } |
198 | |
199 | static unsigned int sun4v_huge_tte_to_shift(pte_t entry) |
200 | { |
201 | unsigned long tte_szbits = pte_val(pte: entry) & _PAGE_SZALL_4V; |
202 | unsigned int shift; |
203 | |
204 | switch (tte_szbits) { |
205 | case _PAGE_SZ16GB_4V: |
206 | shift = HPAGE_16GB_SHIFT; |
207 | break; |
208 | case _PAGE_SZ2GB_4V: |
209 | shift = HPAGE_2GB_SHIFT; |
210 | break; |
211 | case _PAGE_SZ256MB_4V: |
212 | shift = HPAGE_256MB_SHIFT; |
213 | break; |
214 | case _PAGE_SZ4MB_4V: |
215 | shift = REAL_HPAGE_SHIFT; |
216 | break; |
217 | case _PAGE_SZ64K_4V: |
218 | shift = HPAGE_64K_SHIFT; |
219 | break; |
220 | default: |
221 | shift = PAGE_SHIFT; |
222 | break; |
223 | } |
224 | return shift; |
225 | } |
226 | |
227 | static unsigned int sun4u_huge_tte_to_shift(pte_t entry) |
228 | { |
229 | unsigned long tte_szbits = pte_val(pte: entry) & _PAGE_SZALL_4U; |
230 | unsigned int shift; |
231 | |
232 | switch (tte_szbits) { |
233 | case _PAGE_SZ256MB_4U: |
234 | shift = HPAGE_256MB_SHIFT; |
235 | break; |
236 | case _PAGE_SZ4MB_4U: |
237 | shift = REAL_HPAGE_SHIFT; |
238 | break; |
239 | case _PAGE_SZ64K_4U: |
240 | shift = HPAGE_64K_SHIFT; |
241 | break; |
242 | default: |
243 | shift = PAGE_SHIFT; |
244 | break; |
245 | } |
246 | return shift; |
247 | } |
248 | |
249 | static unsigned long tte_to_shift(pte_t entry) |
250 | { |
251 | if (tlb_type == hypervisor) |
252 | return sun4v_huge_tte_to_shift(entry); |
253 | |
254 | return sun4u_huge_tte_to_shift(entry); |
255 | } |
256 | |
257 | static unsigned int huge_tte_to_shift(pte_t entry) |
258 | { |
259 | unsigned long shift = tte_to_shift(entry); |
260 | |
261 | if (shift == PAGE_SHIFT) |
262 | WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n" , |
263 | pte_val(entry)); |
264 | |
265 | return shift; |
266 | } |
267 | |
268 | static unsigned long huge_tte_to_size(pte_t pte) |
269 | { |
270 | unsigned long size = 1UL << huge_tte_to_shift(entry: pte); |
271 | |
272 | if (size == REAL_HPAGE_SIZE) |
273 | size = HPAGE_SIZE; |
274 | return size; |
275 | } |
276 | |
277 | unsigned long pud_leaf_size(pud_t pud) { return 1UL << tte_to_shift(*(pte_t *)&pud); } |
278 | unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << tte_to_shift(*(pte_t *)&pmd); } |
279 | unsigned long pte_leaf_size(pte_t pte) { return 1UL << tte_to_shift(pte); } |
280 | |
281 | pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, |
282 | unsigned long addr, unsigned long sz) |
283 | { |
284 | pgd_t *pgd; |
285 | p4d_t *p4d; |
286 | pud_t *pud; |
287 | pmd_t *pmd; |
288 | |
289 | pgd = pgd_offset(mm, addr); |
290 | p4d = p4d_offset(pgd, address: addr); |
291 | pud = pud_alloc(mm, p4d, address: addr); |
292 | if (!pud) |
293 | return NULL; |
294 | if (sz >= PUD_SIZE) |
295 | return (pte_t *)pud; |
296 | pmd = pmd_alloc(mm, pud, address: addr); |
297 | if (!pmd) |
298 | return NULL; |
299 | if (sz >= PMD_SIZE) |
300 | return (pte_t *)pmd; |
301 | return pte_alloc_huge(mm, pmd, address: addr); |
302 | } |
303 | |
304 | pte_t *huge_pte_offset(struct mm_struct *mm, |
305 | unsigned long addr, unsigned long sz) |
306 | { |
307 | pgd_t *pgd; |
308 | p4d_t *p4d; |
309 | pud_t *pud; |
310 | pmd_t *pmd; |
311 | |
312 | pgd = pgd_offset(mm, addr); |
313 | if (pgd_none(pgd: *pgd)) |
314 | return NULL; |
315 | p4d = p4d_offset(pgd, address: addr); |
316 | if (p4d_none(p4d: *p4d)) |
317 | return NULL; |
318 | pud = pud_offset(p4d, address: addr); |
319 | if (pud_none(pud: *pud)) |
320 | return NULL; |
321 | if (is_hugetlb_pud(*pud)) |
322 | return (pte_t *)pud; |
323 | pmd = pmd_offset(pud, address: addr); |
324 | if (pmd_none(pmd: *pmd)) |
325 | return NULL; |
326 | if (is_hugetlb_pmd(*pmd)) |
327 | return (pte_t *)pmd; |
328 | return pte_offset_huge(pmd, address: addr); |
329 | } |
330 | |
331 | void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
332 | pte_t *ptep, pte_t entry) |
333 | { |
334 | unsigned int nptes, orig_shift, shift; |
335 | unsigned long i, size; |
336 | pte_t orig; |
337 | |
338 | size = huge_tte_to_size(pte: entry); |
339 | |
340 | shift = PAGE_SHIFT; |
341 | if (size >= PUD_SIZE) |
342 | shift = PUD_SHIFT; |
343 | else if (size >= PMD_SIZE) |
344 | shift = PMD_SHIFT; |
345 | else |
346 | shift = PAGE_SHIFT; |
347 | |
348 | nptes = size >> shift; |
349 | |
350 | if (!pte_present(a: *ptep) && pte_present(a: entry)) |
351 | mm->context.hugetlb_pte_count += nptes; |
352 | |
353 | addr &= ~(size - 1); |
354 | orig = *ptep; |
355 | orig_shift = pte_none(pte: orig) ? PAGE_SHIFT : huge_tte_to_shift(entry: orig); |
356 | |
357 | for (i = 0; i < nptes; i++) |
358 | ptep[i] = __pte(val: pte_val(pte: entry) + (i << shift)); |
359 | |
360 | maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift); |
361 | /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ |
362 | if (size == HPAGE_SIZE) |
363 | maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0, |
364 | orig_shift); |
365 | } |
366 | |
367 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
368 | pte_t *ptep, pte_t entry, unsigned long sz) |
369 | { |
370 | __set_huge_pte_at(mm, addr, ptep, entry); |
371 | } |
372 | |
373 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
374 | pte_t *ptep) |
375 | { |
376 | unsigned int i, nptes, orig_shift, shift; |
377 | unsigned long size; |
378 | pte_t entry; |
379 | |
380 | entry = *ptep; |
381 | size = huge_tte_to_size(pte: entry); |
382 | |
383 | shift = PAGE_SHIFT; |
384 | if (size >= PUD_SIZE) |
385 | shift = PUD_SHIFT; |
386 | else if (size >= PMD_SIZE) |
387 | shift = PMD_SHIFT; |
388 | else |
389 | shift = PAGE_SHIFT; |
390 | |
391 | nptes = size >> shift; |
392 | orig_shift = pte_none(pte: entry) ? PAGE_SHIFT : huge_tte_to_shift(entry); |
393 | |
394 | if (pte_present(a: entry)) |
395 | mm->context.hugetlb_pte_count -= nptes; |
396 | |
397 | addr &= ~(size - 1); |
398 | for (i = 0; i < nptes; i++) |
399 | ptep[i] = __pte(val: 0UL); |
400 | |
401 | maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift); |
402 | /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ |
403 | if (size == HPAGE_SIZE) |
404 | maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0, |
405 | orig_shift); |
406 | |
407 | return entry; |
408 | } |
409 | |
410 | int pmd_huge(pmd_t pmd) |
411 | { |
412 | return !pmd_none(pmd) && |
413 | (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID; |
414 | } |
415 | |
416 | int pud_huge(pud_t pud) |
417 | { |
418 | return !pud_none(pud) && |
419 | (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID; |
420 | } |
421 | |
422 | static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, |
423 | unsigned long addr) |
424 | { |
425 | pgtable_t token = pmd_pgtable(*pmd); |
426 | |
427 | pmd_clear(pmdp: pmd); |
428 | pte_free_tlb(tlb, token, addr); |
429 | mm_dec_nr_ptes(mm: tlb->mm); |
430 | } |
431 | |
432 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
433 | unsigned long addr, unsigned long end, |
434 | unsigned long floor, unsigned long ceiling) |
435 | { |
436 | pmd_t *pmd; |
437 | unsigned long next; |
438 | unsigned long start; |
439 | |
440 | start = addr; |
441 | pmd = pmd_offset(pud, address: addr); |
442 | do { |
443 | next = pmd_addr_end(addr, end); |
444 | if (pmd_none(pmd: *pmd)) |
445 | continue; |
446 | if (is_hugetlb_pmd(*pmd)) |
447 | pmd_clear(pmdp: pmd); |
448 | else |
449 | hugetlb_free_pte_range(tlb, pmd, addr); |
450 | } while (pmd++, addr = next, addr != end); |
451 | |
452 | start &= PUD_MASK; |
453 | if (start < floor) |
454 | return; |
455 | if (ceiling) { |
456 | ceiling &= PUD_MASK; |
457 | if (!ceiling) |
458 | return; |
459 | } |
460 | if (end - 1 > ceiling - 1) |
461 | return; |
462 | |
463 | pmd = pmd_offset(pud, address: start); |
464 | pud_clear(pudp: pud); |
465 | pmd_free_tlb(tlb, pmd, start); |
466 | mm_dec_nr_pmds(mm: tlb->mm); |
467 | } |
468 | |
469 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, |
470 | unsigned long addr, unsigned long end, |
471 | unsigned long floor, unsigned long ceiling) |
472 | { |
473 | pud_t *pud; |
474 | unsigned long next; |
475 | unsigned long start; |
476 | |
477 | start = addr; |
478 | pud = pud_offset(p4d, address: addr); |
479 | do { |
480 | next = pud_addr_end(addr, end); |
481 | if (pud_none_or_clear_bad(pud)) |
482 | continue; |
483 | if (is_hugetlb_pud(*pud)) |
484 | pud_clear(pudp: pud); |
485 | else |
486 | hugetlb_free_pmd_range(tlb, pud, addr, end: next, floor, |
487 | ceiling); |
488 | } while (pud++, addr = next, addr != end); |
489 | |
490 | start &= PGDIR_MASK; |
491 | if (start < floor) |
492 | return; |
493 | if (ceiling) { |
494 | ceiling &= PGDIR_MASK; |
495 | if (!ceiling) |
496 | return; |
497 | } |
498 | if (end - 1 > ceiling - 1) |
499 | return; |
500 | |
501 | pud = pud_offset(p4d, address: start); |
502 | p4d_clear(p4dp: p4d); |
503 | pud_free_tlb(tlb, pud, start); |
504 | mm_dec_nr_puds(mm: tlb->mm); |
505 | } |
506 | |
507 | void hugetlb_free_pgd_range(struct mmu_gather *tlb, |
508 | unsigned long addr, unsigned long end, |
509 | unsigned long floor, unsigned long ceiling) |
510 | { |
511 | pgd_t *pgd; |
512 | p4d_t *p4d; |
513 | unsigned long next; |
514 | |
515 | addr &= PMD_MASK; |
516 | if (addr < floor) { |
517 | addr += PMD_SIZE; |
518 | if (!addr) |
519 | return; |
520 | } |
521 | if (ceiling) { |
522 | ceiling &= PMD_MASK; |
523 | if (!ceiling) |
524 | return; |
525 | } |
526 | if (end - 1 > ceiling - 1) |
527 | end -= PMD_SIZE; |
528 | if (addr > end - 1) |
529 | return; |
530 | |
531 | pgd = pgd_offset(tlb->mm, addr); |
532 | p4d = p4d_offset(pgd, address: addr); |
533 | do { |
534 | next = p4d_addr_end(addr, end); |
535 | if (p4d_none_or_clear_bad(p4d)) |
536 | continue; |
537 | hugetlb_free_pud_range(tlb, p4d, addr, end: next, floor, ceiling); |
538 | } while (p4d++, addr = next, addr != end); |
539 | } |
540 | |