subpage_prot.c source code [linux/arch/powerpc/mm/book3s64/subpage_prot.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* Copyright 2007-2008 Paul Mackerras, IBM Corp.
4	*/
5
6	#include <linux/errno.h>
7	#include <linux/kernel.h>
8	#include <linux/gfp.h>
9	#include <linux/types.h>
10	#include <linux/pagewalk.h>
11	#include <linux/hugetlb.h>
12	#include <linux/syscalls.h>
13
14	#include <linux/pgtable.h>
15	#include <linux/uaccess.h>
16
17	/*
18	* Free all pages allocated for subpage protection maps and pointers.
19	* Also makes sure that the subpage_prot_table structure is
20	* reinitialized for the next user.
21	*/
22	void subpage_prot_free(struct mm_struct *mm)
23	{
24	struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
25	unsigned long i, j, addr;
26	u32 **p;
27
28	if (!spt)
29	return;
30
31	for (i = `0`; i < `4`; ++i) {
32	if (spt->low_prot[i]) {
33	free_page((unsigned long)spt->low_prot[i]);
34	spt->low_prot[i] = NULL;
35	}
36	}
37	addr = `0`;
38	for (i = `0`; i < (TASK_SIZE_USER64 >> `43`); ++i) {
39	p = spt->protptrs[i];
40	if (!p)
41	continue;
42	spt->protptrs[i] = NULL;
43	for (j = `0`; j < SBP_L2_COUNT && addr < spt->maxaddr;
44	++j, addr += PAGE_SIZE)
45	if (p[j])
46	free_page((unsigned long)p[j]);
47	free_page((unsigned long)p);
48	}
49	spt->maxaddr = `0`;
50	kfree(objp: spt);
51	}
52
53	static void hpte_flush_range(struct mm_struct mm, unsigned* long addr,
54	int npages)
55	{
56	pgd_t *pgd;
57	p4d_t *p4d;
58	pud_t *pud;
59	pmd_t *pmd;
60	pte_t *pte;
61	spinlock_t *ptl;
62
63	pgd = pgd_offset(mm, addr);
64	p4d = p4d_offset(pgd, address: addr);
65	if (p4d_none(p4d: *p4d))
66	return;
67	pud = pud_offset(p4d, address: addr);
68	if (pud_none(pud: *pud))
69	return;
70	pmd = pmd_offset(pud, address: addr);
71	if (pmd_none(pmd: *pmd))
72	return;
73	pte = pte_offset_map_lock(mm, pmd, addr, ptlp: &ptl);
74	if (!pte)
75	return;
76	arch_enter_lazy_mmu_mode();
77	for (; npages > `0`; --npages) {
78	pte_update(mm, addr, pte, `0`, `0`, `0`);
79	addr += PAGE_SIZE;
80	++pte;
81	}
82	arch_leave_lazy_mmu_mode();
83	pte_unmap_unlock(pte - `1`, ptl);
84	}
85
86	/*
87	* Clear the subpage protection map for an address range, allowing
88	* all accesses that are allowed by the pte permissions.
89	*/
90	static void subpage_prot_clear(unsigned long addr, unsigned long len)
91	{
92	struct mm_struct *mm = current->mm;
93	struct subpage_prot_table *spt;
94	u32 *spm, spp;
95	unsigned long i;
96	size_t nw;
97	unsigned long next, limit;
98
99	mmap_write_lock(mm);
100
101	spt = mm_ctx_subpage_prot(&mm->context);
102	if (!spt)
103	goto err_out;
104
105	limit = addr + len;
106	if (limit > spt->maxaddr)
107	limit = spt->maxaddr;
108	for (; addr < limit; addr = next) {
109	next = pmd_addr_end(addr, limit);
110	if (addr < `0x100000000UL`) {
111	spm = spt->low_prot;
112	} else {
113	spm = spt->protptrs[addr >> SBP_L3_SHIFT];
114	if (!spm)
115	continue;
116	}
117	spp = spm[(addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - `1`)];
118	if (!spp)
119	continue;
120	spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - `1`);
121
122	i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - `1`);
123	nw = PTRS_PER_PTE - i;
124	if (addr + (nw << PAGE_SHIFT) > next)
125	nw = (next - addr) >> PAGE_SHIFT;
126
127	memset(spp, `0`, nw * sizeof(u32));
128
129	/ now flush any existing HPTEs for the range /
130	hpte_flush_range(mm, addr, npages: nw);
131	}
132
133	err_out:
134	mmap_write_unlock(mm);
135	}
136
137	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
138	static int subpage_walk_pmd_entry(pmd_t pmd, unsigned* long addr,
139	unsigned long end, struct mm_walk *walk)
140	{
141	struct vm_area_struct *vma = walk->vma;
142	split_huge_pmd(vma, pmd, addr);
143	return `0`;
144	}
145
146	static const struct mm_walk_ops subpage_walk_ops = {
147	.pmd_entry = subpage_walk_pmd_entry,
148	.walk_lock = PGWALK_WRLOCK_VERIFY,
149	};
150
151	static void subpage_mark_vma_nohuge(struct mm_struct mm, unsigned* long addr,
152	unsigned long len)
153	{
154	struct vm_area_struct *vma;
155	VMA_ITERATOR(vmi, mm, addr);
156
157	/*
158	* We don't try too hard, we just mark all the vma in that range
159	* VM_NOHUGEPAGE and split them.
160	*/
161	for_each_vma_range(vmi, vma, addr + len) {
162	vm_flags_set(vma, VM_NOHUGEPAGE);
163	walk_page_vma(vma, ops: &subpage_walk_ops, NULL);
164	}
165	}
166	#else
167	static void subpage_mark_vma_nohuge(struct mm_struct mm, unsigned* long addr,
168	unsigned long len)
169	{
170	return;
171	}
172	#endif
173
174	/*
175	* Copy in a subpage protection map for an address range.
176	* The map has 2 bits per 4k subpage, so 32 bits per 64k page.
177	* Each 2-bit field is 0 to allow any access, 1 to prevent writes,
178	* 2 or 3 to prevent all accesses.
179	* Note that the normal page protections also apply; the subpage
180	* protection mechanism is an additional constraint, so putting 0
181	* in a 2-bit field won't allow writes to a page that is otherwise
182	* write-protected.
183	*/
184	SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
185	unsigned long, len, u32 __user *, map)
186	{
187	struct mm_struct *mm = current->mm;
188	struct subpage_prot_table *spt;
189	u32 *spm, spp;
190	unsigned long i;
191	size_t nw;
192	unsigned long next, limit;
193	int err;
194
195	if (radix_enabled())
196	return -ENOENT;
197
198	/ Check parameters /
199	if ((addr & ~PAGE_MASK) \|\| (len & ~PAGE_MASK) \|\|
200	addr >= mm->task_size \|\| len >= mm->task_size \|\|
201	addr + len > mm->task_size)
202	return -EINVAL;
203
204	if (is_hugepage_only_range(mm, addr, len))
205	return -EINVAL;
206
207	if (!map) {
208	/ Clear out the protection map for the address range /
209	subpage_prot_clear(addr, len);
210	return `0`;
211	}
212
213	if (!access_ok(map, (len >> PAGE_SHIFT) * sizeof(u32)))
214	return -EFAULT;
215
216	mmap_write_lock(mm);
217
218	spt = mm_ctx_subpage_prot(&mm->context);
219	if (!spt) {
220	/*
221	* Allocate subpage prot table if not already done.
222	* Do this with mmap_lock held
223	*/
224	spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
225	if (!spt) {
226	err = -ENOMEM;
227	goto out;
228	}
229	mm->context.hash_context->spt = spt;
230	}
231
232	subpage_mark_vma_nohuge(mm, addr, len);
233	for (limit = addr + len; addr < limit; addr = next) {
234	next = pmd_addr_end(addr, limit);
235	err = -ENOMEM;
236	if (addr < `0x100000000UL`) {
237	spm = spt->low_prot;
238	} else {
239	spm = spt->protptrs[addr >> SBP_L3_SHIFT];
240	if (!spm) {
241	spm = (u32 **)get_zeroed_page(GFP_KERNEL);
242	if (!spm)
243	goto out;
244	spt->protptrs[addr >> SBP_L3_SHIFT] = spm;
245	}
246	}
247	spm += (addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - `1`);
248	spp = *spm;
249	if (!spp) {
250	spp = (u32 *)get_zeroed_page(GFP_KERNEL);
251	if (!spp)
252	goto out;
253	*spm = spp;
254	}
255	spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - `1`);
256
257	local_irq_disable();
258	demote_segment_4k(mm, addr);
259	local_irq_enable();
260
261	i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - `1`);
262	nw = PTRS_PER_PTE - i;
263	if (addr + (nw << PAGE_SHIFT) > next)
264	nw = (next - addr) >> PAGE_SHIFT;
265
266	mmap_write_unlock(mm);
267	if (__copy_from_user(to: spp, from: map, n: nw * sizeof(u32)))
268	return -EFAULT;
269	map += nw;
270	mmap_write_lock(mm);
271
272	/ now flush any existing HPTEs for the range /
273	hpte_flush_range(mm, addr, npages: nw);
274	}
275	if (limit > spt->maxaddr)
276	spt->maxaddr = limit;
277	err = `0`;
278	out:
279	mmap_write_unlock(mm);
280	return err;
281	}
282

source code of linux/arch/powerpc/mm/book3s64/subpage_prot.c