1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright IBM Corporation, 2021 |
4 | * |
5 | * Author: Mike Rapoport <rppt@linux.ibm.com> |
6 | */ |
7 | |
8 | #include <linux/mm.h> |
9 | #include <linux/fs.h> |
10 | #include <linux/swap.h> |
11 | #include <linux/mount.h> |
12 | #include <linux/memfd.h> |
13 | #include <linux/bitops.h> |
14 | #include <linux/printk.h> |
15 | #include <linux/pagemap.h> |
16 | #include <linux/syscalls.h> |
17 | #include <linux/pseudo_fs.h> |
18 | #include <linux/secretmem.h> |
19 | #include <linux/set_memory.h> |
20 | #include <linux/sched/signal.h> |
21 | |
22 | #include <uapi/linux/magic.h> |
23 | |
24 | #include <asm/tlbflush.h> |
25 | |
26 | #include "internal.h" |
27 | |
28 | #undef pr_fmt |
29 | #define pr_fmt(fmt) "secretmem: " fmt |
30 | |
31 | /* |
32 | * Define mode and flag masks to allow validation of the system call |
33 | * parameters. |
34 | */ |
35 | #define SECRETMEM_MODE_MASK (0x0) |
36 | #define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK |
37 | |
38 | static bool secretmem_enable __ro_after_init = 1; |
39 | module_param_named(enable, secretmem_enable, bool, 0400); |
40 | MODULE_PARM_DESC(secretmem_enable, |
41 | "Enable secretmem and memfd_secret(2) system call" ); |
42 | |
43 | static atomic_t secretmem_users; |
44 | |
45 | bool secretmem_active(void) |
46 | { |
47 | return !!atomic_read(v: &secretmem_users); |
48 | } |
49 | |
50 | static vm_fault_t secretmem_fault(struct vm_fault *vmf) |
51 | { |
52 | struct address_space *mapping = vmf->vma->vm_file->f_mapping; |
53 | struct inode *inode = file_inode(f: vmf->vma->vm_file); |
54 | pgoff_t offset = vmf->pgoff; |
55 | gfp_t gfp = vmf->gfp_mask; |
56 | unsigned long addr; |
57 | struct page *page; |
58 | struct folio *folio; |
59 | vm_fault_t ret; |
60 | int err; |
61 | |
62 | if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) |
63 | return vmf_error(err: -EINVAL); |
64 | |
65 | filemap_invalidate_lock_shared(mapping); |
66 | |
67 | retry: |
68 | page = find_lock_page(mapping, index: offset); |
69 | if (!page) { |
70 | folio = folio_alloc(gfp: gfp | __GFP_ZERO, order: 0); |
71 | if (!folio) { |
72 | ret = VM_FAULT_OOM; |
73 | goto out; |
74 | } |
75 | |
76 | page = &folio->page; |
77 | err = set_direct_map_invalid_noflush(page); |
78 | if (err) { |
79 | folio_put(folio); |
80 | ret = vmf_error(err); |
81 | goto out; |
82 | } |
83 | |
84 | __folio_mark_uptodate(folio); |
85 | err = filemap_add_folio(mapping, folio, index: offset, gfp); |
86 | if (unlikely(err)) { |
87 | folio_put(folio); |
88 | /* |
89 | * If a split of large page was required, it |
90 | * already happened when we marked the page invalid |
91 | * which guarantees that this call won't fail |
92 | */ |
93 | set_direct_map_default_noflush(page); |
94 | if (err == -EEXIST) |
95 | goto retry; |
96 | |
97 | ret = vmf_error(err); |
98 | goto out; |
99 | } |
100 | |
101 | addr = (unsigned long)page_address(page); |
102 | flush_tlb_kernel_range(start: addr, end: addr + PAGE_SIZE); |
103 | } |
104 | |
105 | vmf->page = page; |
106 | ret = VM_FAULT_LOCKED; |
107 | |
108 | out: |
109 | filemap_invalidate_unlock_shared(mapping); |
110 | return ret; |
111 | } |
112 | |
113 | static const struct vm_operations_struct secretmem_vm_ops = { |
114 | .fault = secretmem_fault, |
115 | }; |
116 | |
117 | static int secretmem_release(struct inode *inode, struct file *file) |
118 | { |
119 | atomic_dec(v: &secretmem_users); |
120 | return 0; |
121 | } |
122 | |
123 | static int secretmem_mmap(struct file *file, struct vm_area_struct *vma) |
124 | { |
125 | unsigned long len = vma->vm_end - vma->vm_start; |
126 | |
127 | if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0) |
128 | return -EINVAL; |
129 | |
130 | if (!mlock_future_ok(mm: vma->vm_mm, flags: vma->vm_flags | VM_LOCKED, bytes: len)) |
131 | return -EAGAIN; |
132 | |
133 | vm_flags_set(vma, VM_LOCKED | VM_DONTDUMP); |
134 | vma->vm_ops = &secretmem_vm_ops; |
135 | |
136 | return 0; |
137 | } |
138 | |
139 | bool vma_is_secretmem(struct vm_area_struct *vma) |
140 | { |
141 | return vma->vm_ops == &secretmem_vm_ops; |
142 | } |
143 | |
144 | static const struct file_operations secretmem_fops = { |
145 | .release = secretmem_release, |
146 | .mmap = secretmem_mmap, |
147 | }; |
148 | |
149 | static int secretmem_migrate_folio(struct address_space *mapping, |
150 | struct folio *dst, struct folio *src, enum migrate_mode mode) |
151 | { |
152 | return -EBUSY; |
153 | } |
154 | |
155 | static void secretmem_free_folio(struct folio *folio) |
156 | { |
157 | set_direct_map_default_noflush(&folio->page); |
158 | folio_zero_segment(folio, start: 0, xend: folio_size(folio)); |
159 | } |
160 | |
161 | const struct address_space_operations secretmem_aops = { |
162 | .dirty_folio = noop_dirty_folio, |
163 | .free_folio = secretmem_free_folio, |
164 | .migrate_folio = secretmem_migrate_folio, |
165 | }; |
166 | |
167 | static int secretmem_setattr(struct mnt_idmap *idmap, |
168 | struct dentry *dentry, struct iattr *iattr) |
169 | { |
170 | struct inode *inode = d_inode(dentry); |
171 | struct address_space *mapping = inode->i_mapping; |
172 | unsigned int ia_valid = iattr->ia_valid; |
173 | int ret; |
174 | |
175 | filemap_invalidate_lock(mapping); |
176 | |
177 | if ((ia_valid & ATTR_SIZE) && inode->i_size) |
178 | ret = -EINVAL; |
179 | else |
180 | ret = simple_setattr(idmap, dentry, iattr); |
181 | |
182 | filemap_invalidate_unlock(mapping); |
183 | |
184 | return ret; |
185 | } |
186 | |
187 | static const struct inode_operations secretmem_iops = { |
188 | .setattr = secretmem_setattr, |
189 | }; |
190 | |
191 | static struct vfsmount *secretmem_mnt; |
192 | |
193 | static struct file *secretmem_file_create(unsigned long flags) |
194 | { |
195 | struct file *file; |
196 | struct inode *inode; |
197 | const char *anon_name = "[secretmem]" ; |
198 | const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name)); |
199 | int err; |
200 | |
201 | inode = alloc_anon_inode(secretmem_mnt->mnt_sb); |
202 | if (IS_ERR(ptr: inode)) |
203 | return ERR_CAST(ptr: inode); |
204 | |
205 | err = security_inode_init_security_anon(inode, name: &qname, NULL); |
206 | if (err) { |
207 | file = ERR_PTR(error: err); |
208 | goto err_free_inode; |
209 | } |
210 | |
211 | file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem" , |
212 | O_RDWR, &secretmem_fops); |
213 | if (IS_ERR(ptr: file)) |
214 | goto err_free_inode; |
215 | |
216 | mapping_set_gfp_mask(m: inode->i_mapping, GFP_HIGHUSER); |
217 | mapping_set_unevictable(mapping: inode->i_mapping); |
218 | |
219 | inode->i_op = &secretmem_iops; |
220 | inode->i_mapping->a_ops = &secretmem_aops; |
221 | |
222 | /* pretend we are a normal file with zero size */ |
223 | inode->i_mode |= S_IFREG; |
224 | inode->i_size = 0; |
225 | |
226 | return file; |
227 | |
228 | err_free_inode: |
229 | iput(inode); |
230 | return file; |
231 | } |
232 | |
233 | SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) |
234 | { |
235 | struct file *file; |
236 | int fd, err; |
237 | |
238 | /* make sure local flags do not confict with global fcntl.h */ |
239 | BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC); |
240 | |
241 | if (!secretmem_enable) |
242 | return -ENOSYS; |
243 | |
244 | if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) |
245 | return -EINVAL; |
246 | if (atomic_read(v: &secretmem_users) < 0) |
247 | return -ENFILE; |
248 | |
249 | fd = get_unused_fd_flags(flags: flags & O_CLOEXEC); |
250 | if (fd < 0) |
251 | return fd; |
252 | |
253 | file = secretmem_file_create(flags); |
254 | if (IS_ERR(ptr: file)) { |
255 | err = PTR_ERR(ptr: file); |
256 | goto err_put_fd; |
257 | } |
258 | |
259 | file->f_flags |= O_LARGEFILE; |
260 | |
261 | atomic_inc(v: &secretmem_users); |
262 | fd_install(fd, file); |
263 | return fd; |
264 | |
265 | err_put_fd: |
266 | put_unused_fd(fd); |
267 | return err; |
268 | } |
269 | |
270 | static int secretmem_init_fs_context(struct fs_context *fc) |
271 | { |
272 | return init_pseudo(fc, SECRETMEM_MAGIC) ? 0 : -ENOMEM; |
273 | } |
274 | |
275 | static struct file_system_type secretmem_fs = { |
276 | .name = "secretmem" , |
277 | .init_fs_context = secretmem_init_fs_context, |
278 | .kill_sb = kill_anon_super, |
279 | }; |
280 | |
281 | static int __init secretmem_init(void) |
282 | { |
283 | if (!secretmem_enable) |
284 | return 0; |
285 | |
286 | secretmem_mnt = kern_mount(&secretmem_fs); |
287 | if (IS_ERR(ptr: secretmem_mnt)) |
288 | return PTR_ERR(ptr: secretmem_mnt); |
289 | |
290 | /* prevent secretmem mappings from ever getting PROT_EXEC */ |
291 | secretmem_mnt->mnt_flags |= MNT_NOEXEC; |
292 | |
293 | return 0; |
294 | } |
295 | fs_initcall(secretmem_init); |
296 | |