1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * arch/arm64/mm/hugetlbpage.c
4 *
5 * Copyright (C) 2013 Linaro Ltd.
6 *
7 * Based on arch/x86/mm/hugetlbpage.c.
8 */
9
10#include <linux/init.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/hugetlb.h>
14#include <linux/pagemap.h>
15#include <linux/err.h>
16#include <linux/sysctl.h>
17#include <asm/mman.h>
18#include <asm/tlb.h>
19#include <asm/tlbflush.h>
20
21/*
22 * HugeTLB Support Matrix
23 *
24 * ---------------------------------------------------
25 * | Page Size | CONT PTE | PMD | CONT PMD | PUD |
26 * ---------------------------------------------------
27 * | 4K | 64K | 2M | 32M | 1G |
28 * | 16K | 2M | 32M | 1G | |
29 * | 64K | 2M | 512M | 16G | |
30 * ---------------------------------------------------
31 */
32
33/*
34 * Reserve CMA areas for the largest supported gigantic
35 * huge page when requested. Any other smaller gigantic
36 * huge pages could still be served from those areas.
37 */
38#ifdef CONFIG_CMA
39void __init arm64_hugetlb_cma_reserve(void)
40{
41 int order;
42
43 if (pud_sect_supported())
44 order = PUD_SHIFT - PAGE_SHIFT;
45 else
46 order = CONT_PMD_SHIFT - PAGE_SHIFT;
47
48 hugetlb_cma_reserve(order);
49}
50#endif /* CONFIG_CMA */
51
52static bool __hugetlb_valid_size(unsigned long size)
53{
54 switch (size) {
55#ifndef __PAGETABLE_PMD_FOLDED
56 case PUD_SIZE:
57 return pud_sect_supported();
58#endif
59 case CONT_PMD_SIZE:
60 case PMD_SIZE:
61 case CONT_PTE_SIZE:
62 return true;
63 }
64
65 return false;
66}
67
68#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
69bool arch_hugetlb_migration_supported(struct hstate *h)
70{
71 size_t pagesize = huge_page_size(h);
72
73 if (!__hugetlb_valid_size(size: pagesize)) {
74 pr_warn("%s: unrecognized huge page size 0x%lx\n",
75 __func__, pagesize);
76 return false;
77 }
78 return true;
79}
80#endif
81
82int pmd_huge(pmd_t pmd)
83{
84 return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
85}
86
87int pud_huge(pud_t pud)
88{
89#ifndef __PAGETABLE_PMD_FOLDED
90 return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
91#else
92 return 0;
93#endif
94}
95
96static int find_num_contig(struct mm_struct *mm, unsigned long addr,
97 pte_t *ptep, size_t *pgsize)
98{
99 pgd_t *pgdp = pgd_offset(mm, addr);
100 p4d_t *p4dp;
101 pud_t *pudp;
102 pmd_t *pmdp;
103
104 *pgsize = PAGE_SIZE;
105 p4dp = p4d_offset(pgd: pgdp, address: addr);
106 pudp = pud_offset(p4d: p4dp, address: addr);
107 pmdp = pmd_offset(pud: pudp, address: addr);
108 if ((pte_t *)pmdp == ptep) {
109 *pgsize = PMD_SIZE;
110 return CONT_PMDS;
111 }
112 return CONT_PTES;
113}
114
115static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
116{
117 int contig_ptes = 0;
118
119 *pgsize = size;
120
121 switch (size) {
122#ifndef __PAGETABLE_PMD_FOLDED
123 case PUD_SIZE:
124 if (pud_sect_supported())
125 contig_ptes = 1;
126 break;
127#endif
128 case PMD_SIZE:
129 contig_ptes = 1;
130 break;
131 case CONT_PMD_SIZE:
132 *pgsize = PMD_SIZE;
133 contig_ptes = CONT_PMDS;
134 break;
135 case CONT_PTE_SIZE:
136 *pgsize = PAGE_SIZE;
137 contig_ptes = CONT_PTES;
138 break;
139 }
140
141 return contig_ptes;
142}
143
144pte_t huge_ptep_get(pte_t *ptep)
145{
146 int ncontig, i;
147 size_t pgsize;
148 pte_t orig_pte = __ptep_get(ptep);
149
150 if (!pte_present(a: orig_pte) || !pte_cont(orig_pte))
151 return orig_pte;
152
153 ncontig = num_contig_ptes(size: page_size(pte_page(orig_pte)), pgsize: &pgsize);
154 for (i = 0; i < ncontig; i++, ptep++) {
155 pte_t pte = __ptep_get(ptep);
156
157 if (pte_dirty(pte))
158 orig_pte = pte_mkdirty(pte: orig_pte);
159
160 if (pte_young(pte))
161 orig_pte = pte_mkyoung(pte: orig_pte);
162 }
163 return orig_pte;
164}
165
166/*
167 * Changing some bits of contiguous entries requires us to follow a
168 * Break-Before-Make approach, breaking the whole contiguous set
169 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
170 * "Misprogramming of the Contiguous bit", page D4-1762.
171 *
172 * This helper performs the break step.
173 */
174static pte_t get_clear_contig(struct mm_struct *mm,
175 unsigned long addr,
176 pte_t *ptep,
177 unsigned long pgsize,
178 unsigned long ncontig)
179{
180 pte_t orig_pte = __ptep_get(ptep);
181 unsigned long i;
182
183 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
184 pte_t pte = __ptep_get_and_clear(mm, addr, ptep);
185
186 /*
187 * If HW_AFDBM is enabled, then the HW could turn on
188 * the dirty or accessed bit for any page in the set,
189 * so check them all.
190 */
191 if (pte_dirty(pte))
192 orig_pte = pte_mkdirty(pte: orig_pte);
193
194 if (pte_young(pte))
195 orig_pte = pte_mkyoung(pte: orig_pte);
196 }
197 return orig_pte;
198}
199
200static pte_t get_clear_contig_flush(struct mm_struct *mm,
201 unsigned long addr,
202 pte_t *ptep,
203 unsigned long pgsize,
204 unsigned long ncontig)
205{
206 pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
207 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
208
209 flush_tlb_range(&vma, addr, addr + (pgsize * ncontig));
210 return orig_pte;
211}
212
213/*
214 * Changing some bits of contiguous entries requires us to follow a
215 * Break-Before-Make approach, breaking the whole contiguous set
216 * before we can change any entries. See ARM DDI 0487A.k_iss10775,
217 * "Misprogramming of the Contiguous bit", page D4-1762.
218 *
219 * This helper performs the break step for use cases where the
220 * original pte is not needed.
221 */
222static void clear_flush(struct mm_struct *mm,
223 unsigned long addr,
224 pte_t *ptep,
225 unsigned long pgsize,
226 unsigned long ncontig)
227{
228 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
229 unsigned long i, saddr = addr;
230
231 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
232 __ptep_get_and_clear(mm, addr, ptep);
233
234 flush_tlb_range(&vma, saddr, addr);
235}
236
237void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
238 pte_t *ptep, pte_t pte, unsigned long sz)
239{
240 size_t pgsize;
241 int i;
242 int ncontig;
243 unsigned long pfn, dpfn;
244 pgprot_t hugeprot;
245
246 ncontig = num_contig_ptes(size: sz, pgsize: &pgsize);
247
248 if (!pte_present(a: pte)) {
249 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
250 __set_ptes(mm, addr, ptep, pte, 1);
251 return;
252 }
253
254 if (!pte_cont(pte)) {
255 __set_ptes(mm, addr, ptep, pte, 1);
256 return;
257 }
258
259 pfn = pte_pfn(pte);
260 dpfn = pgsize >> PAGE_SHIFT;
261 hugeprot = pte_pgprot(pte);
262
263 clear_flush(mm, addr, ptep, pgsize, ncontig);
264
265 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
266 __set_ptes(mm, addr, ptep, pfn_pte(page_nr: pfn, pgprot: hugeprot), 1);
267}
268
269pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
270 unsigned long addr, unsigned long sz)
271{
272 pgd_t *pgdp;
273 p4d_t *p4dp;
274 pud_t *pudp;
275 pmd_t *pmdp;
276 pte_t *ptep = NULL;
277
278 pgdp = pgd_offset(mm, addr);
279 p4dp = p4d_alloc(mm, pgd: pgdp, address: addr);
280 if (!p4dp)
281 return NULL;
282
283 pudp = pud_alloc(mm, p4d: p4dp, address: addr);
284 if (!pudp)
285 return NULL;
286
287 if (sz == PUD_SIZE) {
288 ptep = (pte_t *)pudp;
289 } else if (sz == (CONT_PTE_SIZE)) {
290 pmdp = pmd_alloc(mm, pud: pudp, address: addr);
291 if (!pmdp)
292 return NULL;
293
294 WARN_ON(addr & (sz - 1));
295 ptep = pte_alloc_huge(mm, pmd: pmdp, address: addr);
296 } else if (sz == PMD_SIZE) {
297 if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
298 ptep = huge_pmd_share(mm, vma, addr, pud: pudp);
299 else
300 ptep = (pte_t *)pmd_alloc(mm, pud: pudp, address: addr);
301 } else if (sz == (CONT_PMD_SIZE)) {
302 pmdp = pmd_alloc(mm, pud: pudp, address: addr);
303 WARN_ON(addr & (sz - 1));
304 return (pte_t *)pmdp;
305 }
306
307 return ptep;
308}
309
310pte_t *huge_pte_offset(struct mm_struct *mm,
311 unsigned long addr, unsigned long sz)
312{
313 pgd_t *pgdp;
314 p4d_t *p4dp;
315 pud_t *pudp, pud;
316 pmd_t *pmdp, pmd;
317
318 pgdp = pgd_offset(mm, addr);
319 if (!pgd_present(READ_ONCE(*pgdp)))
320 return NULL;
321
322 p4dp = p4d_offset(pgd: pgdp, address: addr);
323 if (!p4d_present(READ_ONCE(*p4dp)))
324 return NULL;
325
326 pudp = pud_offset(p4d: p4dp, address: addr);
327 pud = READ_ONCE(*pudp);
328 if (sz != PUD_SIZE && pud_none(pud))
329 return NULL;
330 /* hugepage or swap? */
331 if (pud_huge(pud) || !pud_present(pud))
332 return (pte_t *)pudp;
333 /* table; check the next level */
334
335 if (sz == CONT_PMD_SIZE)
336 addr &= CONT_PMD_MASK;
337
338 pmdp = pmd_offset(pud: pudp, address: addr);
339 pmd = READ_ONCE(*pmdp);
340 if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
341 pmd_none(pmd))
342 return NULL;
343 if (pmd_huge(pmd) || !pmd_present(pmd))
344 return (pte_t *)pmdp;
345
346 if (sz == CONT_PTE_SIZE)
347 return pte_offset_huge(pmd: pmdp, address: (addr & CONT_PTE_MASK));
348
349 return NULL;
350}
351
352unsigned long hugetlb_mask_last_page(struct hstate *h)
353{
354 unsigned long hp_size = huge_page_size(h);
355
356 switch (hp_size) {
357#ifndef __PAGETABLE_PMD_FOLDED
358 case PUD_SIZE:
359 return PGDIR_SIZE - PUD_SIZE;
360#endif
361 case CONT_PMD_SIZE:
362 return PUD_SIZE - CONT_PMD_SIZE;
363 case PMD_SIZE:
364 return PUD_SIZE - PMD_SIZE;
365 case CONT_PTE_SIZE:
366 return PMD_SIZE - CONT_PTE_SIZE;
367 default:
368 break;
369 }
370
371 return 0UL;
372}
373
374pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
375{
376 size_t pagesize = 1UL << shift;
377
378 entry = pte_mkhuge(pte: entry);
379 if (pagesize == CONT_PTE_SIZE) {
380 entry = pte_mkcont(entry);
381 } else if (pagesize == CONT_PMD_SIZE) {
382 entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
383 } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
384 pr_warn("%s: unrecognized huge page size 0x%lx\n",
385 __func__, pagesize);
386 }
387 return entry;
388}
389
390void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
391 pte_t *ptep, unsigned long sz)
392{
393 int i, ncontig;
394 size_t pgsize;
395
396 ncontig = num_contig_ptes(size: sz, pgsize: &pgsize);
397
398 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
399 __pte_clear(mm, addr, ptep);
400}
401
402pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
403 unsigned long addr, pte_t *ptep)
404{
405 int ncontig;
406 size_t pgsize;
407 pte_t orig_pte = __ptep_get(ptep);
408
409 if (!pte_cont(orig_pte))
410 return __ptep_get_and_clear(mm, addr, ptep);
411
412 ncontig = find_num_contig(mm, addr, ptep, pgsize: &pgsize);
413
414 return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
415}
416
417/*
418 * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
419 * and write permission.
420 *
421 * For a contiguous huge pte range we need to check whether or not write
422 * permission has to change only on the first pte in the set. Then for
423 * all the contiguous ptes we need to check whether or not there is a
424 * discrepancy between dirty or young.
425 */
426static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
427{
428 int i;
429
430 if (pte_write(pte) != pte_write(__ptep_get(ptep)))
431 return 1;
432
433 for (i = 0; i < ncontig; i++) {
434 pte_t orig_pte = __ptep_get(ptep + i);
435
436 if (pte_dirty(pte) != pte_dirty(pte: orig_pte))
437 return 1;
438
439 if (pte_young(pte) != pte_young(pte: orig_pte))
440 return 1;
441 }
442
443 return 0;
444}
445
446int huge_ptep_set_access_flags(struct vm_area_struct *vma,
447 unsigned long addr, pte_t *ptep,
448 pte_t pte, int dirty)
449{
450 int ncontig, i;
451 size_t pgsize = 0;
452 unsigned long pfn = pte_pfn(pte), dpfn;
453 struct mm_struct *mm = vma->vm_mm;
454 pgprot_t hugeprot;
455 pte_t orig_pte;
456
457 if (!pte_cont(pte))
458 return __ptep_set_access_flags(vma, addr, ptep, pte, dirty);
459
460 ncontig = find_num_contig(mm, addr, ptep, pgsize: &pgsize);
461 dpfn = pgsize >> PAGE_SHIFT;
462
463 if (!__cont_access_flags_changed(ptep, pte, ncontig))
464 return 0;
465
466 orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
467
468 /* Make sure we don't lose the dirty or young state */
469 if (pte_dirty(pte: orig_pte))
470 pte = pte_mkdirty(pte);
471
472 if (pte_young(pte: orig_pte))
473 pte = pte_mkyoung(pte);
474
475 hugeprot = pte_pgprot(pte);
476 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
477 __set_ptes(mm, addr, ptep, pfn_pte(page_nr: pfn, pgprot: hugeprot), 1);
478
479 return 1;
480}
481
482void huge_ptep_set_wrprotect(struct mm_struct *mm,
483 unsigned long addr, pte_t *ptep)
484{
485 unsigned long pfn, dpfn;
486 pgprot_t hugeprot;
487 int ncontig, i;
488 size_t pgsize;
489 pte_t pte;
490
491 if (!pte_cont(__ptep_get(ptep))) {
492 __ptep_set_wrprotect(mm, addr, ptep);
493 return;
494 }
495
496 ncontig = find_num_contig(mm, addr, ptep, pgsize: &pgsize);
497 dpfn = pgsize >> PAGE_SHIFT;
498
499 pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
500 pte = pte_wrprotect(pte);
501
502 hugeprot = pte_pgprot(pte);
503 pfn = pte_pfn(pte);
504
505 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
506 __set_ptes(mm, addr, ptep, pfn_pte(page_nr: pfn, pgprot: hugeprot), 1);
507}
508
509pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
510 unsigned long addr, pte_t *ptep)
511{
512 struct mm_struct *mm = vma->vm_mm;
513 size_t pgsize;
514 int ncontig;
515
516 if (!pte_cont(__ptep_get(ptep)))
517 return ptep_clear_flush(vma, address: addr, ptep);
518
519 ncontig = find_num_contig(mm, addr, ptep, pgsize: &pgsize);
520 return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
521}
522
523static int __init hugetlbpage_init(void)
524{
525 if (pud_sect_supported())
526 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
527
528 hugetlb_add_hstate(order: CONT_PMD_SHIFT - PAGE_SHIFT);
529 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
530 hugetlb_add_hstate(order: CONT_PTE_SHIFT - PAGE_SHIFT);
531
532 return 0;
533}
534arch_initcall(hugetlbpage_init);
535
536bool __init arch_hugetlb_valid_size(unsigned long size)
537{
538 return __hugetlb_valid_size(size);
539}
540
541pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
542{
543 if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) {
544 /*
545 * Break-before-make (BBM) is required for all user space mappings
546 * when the permission changes from executable to non-executable
547 * in cases where cpu is affected with errata #2645198.
548 */
549 if (pte_user_exec(__ptep_get(ptep)))
550 return huge_ptep_clear_flush(vma, addr, ptep);
551 }
552 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
553}
554
555void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
556 pte_t old_pte, pte_t pte)
557{
558 unsigned long psize = huge_page_size(h: hstate_vma(vma));
559
560 set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
561}
562

source code of linux/arch/arm64/mm/hugetlbpage.c