1 | // SPDX-License-Identifier: GPL-2.0 |
---|---|
2 | #include <linux/mount.h> |
3 | #include <linux/pseudo_fs.h> |
4 | #include <linux/file.h> |
5 | #include <linux/fs.h> |
6 | #include <linux/proc_fs.h> |
7 | #include <linux/proc_ns.h> |
8 | #include <linux/magic.h> |
9 | #include <linux/ktime.h> |
10 | #include <linux/seq_file.h> |
11 | #include <linux/pid_namespace.h> |
12 | #include <linux/user_namespace.h> |
13 | #include <linux/nsfs.h> |
14 | #include <linux/uaccess.h> |
15 | #include <linux/mnt_namespace.h> |
16 | |
17 | #include "mount.h" |
18 | #include "internal.h" |
19 | |
20 | static struct vfsmount *nsfs_mnt; |
21 | |
22 | static long ns_ioctl(struct file *filp, unsigned int ioctl, |
23 | unsigned long arg); |
24 | static const struct file_operations ns_file_operations = { |
25 | .unlocked_ioctl = ns_ioctl, |
26 | .compat_ioctl = compat_ptr_ioctl, |
27 | }; |
28 | |
29 | static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) |
30 | { |
31 | struct inode *inode = d_inode(dentry); |
32 | struct ns_common *ns = inode->i_private; |
33 | const struct proc_ns_operations *ns_ops = ns->ops; |
34 | |
35 | return dynamic_dname(buffer, buflen, "%s:[%lu]", |
36 | ns_ops->name, inode->i_ino); |
37 | } |
38 | |
39 | const struct dentry_operations ns_dentry_operations = { |
40 | .d_dname = ns_dname, |
41 | .d_prune = stashed_dentry_prune, |
42 | }; |
43 | |
44 | static void nsfs_evict(struct inode *inode) |
45 | { |
46 | struct ns_common *ns = inode->i_private; |
47 | clear_inode(inode); |
48 | ns->ops->put(ns); |
49 | } |
50 | |
51 | int ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb, |
52 | void *private_data) |
53 | { |
54 | struct ns_common *ns; |
55 | |
56 | ns = ns_get_cb(private_data); |
57 | if (!ns) |
58 | return -ENOENT; |
59 | |
60 | return path_from_stashed(stashed: &ns->stashed, mnt: nsfs_mnt, data: ns, path); |
61 | } |
62 | |
63 | struct ns_get_path_task_args { |
64 | const struct proc_ns_operations *ns_ops; |
65 | struct task_struct *task; |
66 | }; |
67 | |
68 | static struct ns_common *ns_get_path_task(void *private_data) |
69 | { |
70 | struct ns_get_path_task_args *args = private_data; |
71 | |
72 | return args->ns_ops->get(args->task); |
73 | } |
74 | |
75 | int ns_get_path(struct path *path, struct task_struct *task, |
76 | const struct proc_ns_operations *ns_ops) |
77 | { |
78 | struct ns_get_path_task_args args = { |
79 | .ns_ops = ns_ops, |
80 | .task = task, |
81 | }; |
82 | |
83 | return ns_get_path_cb(path, ns_get_cb: ns_get_path_task, private_data: &args); |
84 | } |
85 | |
86 | /** |
87 | * open_namespace - open a namespace |
88 | * @ns: the namespace to open |
89 | * |
90 | * This will consume a reference to @ns indendent of success or failure. |
91 | * |
92 | * Return: A file descriptor on success or a negative error code on failure. |
93 | */ |
94 | int open_namespace(struct ns_common *ns) |
95 | { |
96 | struct path path __free(path_put) = {}; |
97 | struct file *f; |
98 | int err; |
99 | |
100 | /* call first to consume reference */ |
101 | err = path_from_stashed(stashed: &ns->stashed, mnt: nsfs_mnt, data: ns, path: &path); |
102 | if (err < 0) |
103 | return err; |
104 | |
105 | CLASS(get_unused_fd, fd)(O_CLOEXEC); |
106 | if (fd < 0) |
107 | return fd; |
108 | |
109 | f = dentry_open(path: &path, O_RDONLY, current_cred()); |
110 | if (IS_ERR(ptr: f)) |
111 | return PTR_ERR(ptr: f); |
112 | |
113 | fd_install(fd, file: f); |
114 | return take_fd(fd); |
115 | } |
116 | |
117 | int open_related_ns(struct ns_common *ns, |
118 | struct ns_common *(*get_ns)(struct ns_common *ns)) |
119 | { |
120 | struct ns_common *relative; |
121 | |
122 | relative = get_ns(ns); |
123 | if (IS_ERR(ptr: relative)) |
124 | return PTR_ERR(ptr: relative); |
125 | |
126 | return open_namespace(ns: relative); |
127 | } |
128 | EXPORT_SYMBOL_GPL(open_related_ns); |
129 | |
130 | static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns, |
131 | struct mnt_ns_info __user *uinfo, size_t usize, |
132 | struct mnt_ns_info *kinfo) |
133 | { |
134 | /* |
135 | * If userspace and the kernel have the same struct size it can just |
136 | * be copied. If userspace provides an older struct, only the bits that |
137 | * userspace knows about will be copied. If userspace provides a new |
138 | * struct, only the bits that the kernel knows aobut will be copied and |
139 | * the size value will be set to the size the kernel knows about. |
140 | */ |
141 | kinfo->size = min(usize, sizeof(*kinfo)); |
142 | kinfo->mnt_ns_id = mnt_ns->seq; |
143 | kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts); |
144 | /* Subtract the root mount of the mount namespace. */ |
145 | if (kinfo->nr_mounts) |
146 | kinfo->nr_mounts--; |
147 | |
148 | if (copy_to_user(to: uinfo, from: kinfo, n: kinfo->size)) |
149 | return -EFAULT; |
150 | |
151 | return 0; |
152 | } |
153 | |
154 | static bool nsfs_ioctl_valid(unsigned int cmd) |
155 | { |
156 | switch (cmd) { |
157 | case NS_GET_USERNS: |
158 | case NS_GET_PARENT: |
159 | case NS_GET_NSTYPE: |
160 | case NS_GET_OWNER_UID: |
161 | case NS_GET_MNTNS_ID: |
162 | case NS_GET_PID_FROM_PIDNS: |
163 | case NS_GET_TGID_FROM_PIDNS: |
164 | case NS_GET_PID_IN_PIDNS: |
165 | case NS_GET_TGID_IN_PIDNS: |
166 | return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd)); |
167 | } |
168 | |
169 | /* Extensible ioctls require some extra handling. */ |
170 | switch (_IOC_NR(cmd)) { |
171 | case _IOC_NR(NS_MNT_GET_INFO): |
172 | case _IOC_NR(NS_MNT_GET_NEXT): |
173 | case _IOC_NR(NS_MNT_GET_PREV): |
174 | return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd)); |
175 | } |
176 | |
177 | return false; |
178 | } |
179 | |
180 | static long ns_ioctl(struct file *filp, unsigned int ioctl, |
181 | unsigned long arg) |
182 | { |
183 | struct user_namespace *user_ns; |
184 | struct pid_namespace *pid_ns; |
185 | struct task_struct *tsk; |
186 | struct ns_common *ns; |
187 | struct mnt_namespace *mnt_ns; |
188 | bool previous = false; |
189 | uid_t __user *argp; |
190 | uid_t uid; |
191 | int ret; |
192 | |
193 | if (!nsfs_ioctl_valid(cmd: ioctl)) |
194 | return -ENOIOCTLCMD; |
195 | |
196 | ns = get_proc_ns(file_inode(filp)); |
197 | switch (ioctl) { |
198 | case NS_GET_USERNS: |
199 | return open_related_ns(ns, ns_get_owner); |
200 | case NS_GET_PARENT: |
201 | if (!ns->ops->get_parent) |
202 | return -EINVAL; |
203 | return open_related_ns(ns, ns->ops->get_parent); |
204 | case NS_GET_NSTYPE: |
205 | return ns->ops->type; |
206 | case NS_GET_OWNER_UID: |
207 | if (ns->ops->type != CLONE_NEWUSER) |
208 | return -EINVAL; |
209 | user_ns = container_of(ns, struct user_namespace, ns); |
210 | argp = (uid_t __user *) arg; |
211 | uid = from_kuid_munged(current_user_ns(), uid: user_ns->owner); |
212 | return put_user(uid, argp); |
213 | case NS_GET_MNTNS_ID: { |
214 | __u64 __user *idp; |
215 | __u64 id; |
216 | |
217 | if (ns->ops->type != CLONE_NEWNS) |
218 | return -EINVAL; |
219 | |
220 | mnt_ns = container_of(ns, struct mnt_namespace, ns); |
221 | idp = (__u64 __user *)arg; |
222 | id = mnt_ns->seq; |
223 | return put_user(id, idp); |
224 | } |
225 | case NS_GET_PID_FROM_PIDNS: |
226 | fallthrough; |
227 | case NS_GET_TGID_FROM_PIDNS: |
228 | fallthrough; |
229 | case NS_GET_PID_IN_PIDNS: |
230 | fallthrough; |
231 | case NS_GET_TGID_IN_PIDNS: { |
232 | if (ns->ops->type != CLONE_NEWPID) |
233 | return -EINVAL; |
234 | |
235 | ret = -ESRCH; |
236 | pid_ns = container_of(ns, struct pid_namespace, ns); |
237 | |
238 | guard(rcu)(); |
239 | |
240 | if (ioctl == NS_GET_PID_IN_PIDNS || |
241 | ioctl == NS_GET_TGID_IN_PIDNS) |
242 | tsk = find_task_by_vpid(nr: arg); |
243 | else |
244 | tsk = find_task_by_pid_ns(nr: arg, ns: pid_ns); |
245 | if (!tsk) |
246 | break; |
247 | |
248 | switch (ioctl) { |
249 | case NS_GET_PID_FROM_PIDNS: |
250 | ret = task_pid_vnr(tsk); |
251 | break; |
252 | case NS_GET_TGID_FROM_PIDNS: |
253 | ret = task_tgid_vnr(tsk); |
254 | break; |
255 | case NS_GET_PID_IN_PIDNS: |
256 | ret = task_pid_nr_ns(tsk, ns: pid_ns); |
257 | break; |
258 | case NS_GET_TGID_IN_PIDNS: |
259 | ret = task_tgid_nr_ns(tsk, ns: pid_ns); |
260 | break; |
261 | default: |
262 | ret = 0; |
263 | break; |
264 | } |
265 | |
266 | if (!ret) |
267 | ret = -ESRCH; |
268 | return ret; |
269 | } |
270 | } |
271 | |
272 | /* extensible ioctls */ |
273 | switch (_IOC_NR(ioctl)) { |
274 | case _IOC_NR(NS_MNT_GET_INFO): { |
275 | struct mnt_ns_info kinfo = {}; |
276 | struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; |
277 | size_t usize = _IOC_SIZE(ioctl); |
278 | |
279 | if (ns->ops->type != CLONE_NEWNS) |
280 | return -EINVAL; |
281 | |
282 | if (!uinfo) |
283 | return -EINVAL; |
284 | |
285 | if (usize < MNT_NS_INFO_SIZE_VER0) |
286 | return -EINVAL; |
287 | |
288 | return copy_ns_info_to_user(mnt_ns: to_mnt_ns(ns), uinfo, usize, kinfo: &kinfo); |
289 | } |
290 | case _IOC_NR(NS_MNT_GET_PREV): |
291 | previous = true; |
292 | fallthrough; |
293 | case _IOC_NR(NS_MNT_GET_NEXT): { |
294 | struct mnt_ns_info kinfo = {}; |
295 | struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg; |
296 | struct path path __free(path_put) = {}; |
297 | struct file *f __free(fput) = NULL; |
298 | size_t usize = _IOC_SIZE(ioctl); |
299 | |
300 | if (ns->ops->type != CLONE_NEWNS) |
301 | return -EINVAL; |
302 | |
303 | if (usize < MNT_NS_INFO_SIZE_VER0) |
304 | return -EINVAL; |
305 | |
306 | mnt_ns = get_sequential_mnt_ns(mnt_ns: to_mnt_ns(ns), previous); |
307 | if (IS_ERR(ptr: mnt_ns)) |
308 | return PTR_ERR(ptr: mnt_ns); |
309 | |
310 | ns = to_ns_common(mnt_ns); |
311 | /* Transfer ownership of @mnt_ns reference to @path. */ |
312 | ret = path_from_stashed(stashed: &ns->stashed, mnt: nsfs_mnt, data: ns, path: &path); |
313 | if (ret) |
314 | return ret; |
315 | |
316 | CLASS(get_unused_fd, fd)(O_CLOEXEC); |
317 | if (fd < 0) |
318 | return fd; |
319 | |
320 | f = dentry_open(path: &path, O_RDONLY, current_cred()); |
321 | if (IS_ERR(ptr: f)) |
322 | return PTR_ERR(ptr: f); |
323 | |
324 | if (uinfo) { |
325 | /* |
326 | * If @uinfo is passed return all information about the |
327 | * mount namespace as well. |
328 | */ |
329 | ret = copy_ns_info_to_user(mnt_ns: to_mnt_ns(ns), uinfo, usize, kinfo: &kinfo); |
330 | if (ret) |
331 | return ret; |
332 | } |
333 | |
334 | /* Transfer reference of @f to caller's fdtable. */ |
335 | fd_install(fd, no_free_ptr(f)); |
336 | /* File descriptor is live so hand it off to the caller. */ |
337 | return take_fd(fd); |
338 | } |
339 | default: |
340 | ret = -ENOTTY; |
341 | } |
342 | |
343 | return ret; |
344 | } |
345 | |
346 | int ns_get_name(char *buf, size_t size, struct task_struct *task, |
347 | const struct proc_ns_operations *ns_ops) |
348 | { |
349 | struct ns_common *ns; |
350 | int res = -ENOENT; |
351 | const char *name; |
352 | ns = ns_ops->get(task); |
353 | if (ns) { |
354 | name = ns_ops->real_ns_name ? : ns_ops->name; |
355 | res = snprintf(buf, size, fmt: "%s:[%u]", name, ns->inum); |
356 | ns_ops->put(ns); |
357 | } |
358 | return res; |
359 | } |
360 | |
361 | bool proc_ns_file(const struct file *file) |
362 | { |
363 | return file->f_op == &ns_file_operations; |
364 | } |
365 | |
366 | /** |
367 | * ns_match() - Returns true if current namespace matches dev/ino provided. |
368 | * @ns: current namespace |
369 | * @dev: dev_t from nsfs that will be matched against current nsfs |
370 | * @ino: ino_t from nsfs that will be matched against current nsfs |
371 | * |
372 | * Return: true if dev and ino matches the current nsfs. |
373 | */ |
374 | bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino) |
375 | { |
376 | return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev); |
377 | } |
378 | |
379 | |
380 | static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) |
381 | { |
382 | struct inode *inode = d_inode(dentry); |
383 | const struct ns_common *ns = inode->i_private; |
384 | const struct proc_ns_operations *ns_ops = ns->ops; |
385 | |
386 | seq_printf(m: seq, fmt: "%s:[%lu]", ns_ops->name, inode->i_ino); |
387 | return 0; |
388 | } |
389 | |
390 | static const struct super_operations nsfs_ops = { |
391 | .statfs = simple_statfs, |
392 | .evict_inode = nsfs_evict, |
393 | .show_path = nsfs_show_path, |
394 | }; |
395 | |
396 | static int nsfs_init_inode(struct inode *inode, void *data) |
397 | { |
398 | struct ns_common *ns = data; |
399 | |
400 | inode->i_private = data; |
401 | inode->i_mode |= S_IRUGO; |
402 | inode->i_fop = &ns_file_operations; |
403 | inode->i_ino = ns->inum; |
404 | return 0; |
405 | } |
406 | |
407 | static void nsfs_put_data(void *data) |
408 | { |
409 | struct ns_common *ns = data; |
410 | ns->ops->put(ns); |
411 | } |
412 | |
413 | static const struct stashed_operations nsfs_stashed_ops = { |
414 | .init_inode = nsfs_init_inode, |
415 | .put_data = nsfs_put_data, |
416 | }; |
417 | |
418 | static int nsfs_init_fs_context(struct fs_context *fc) |
419 | { |
420 | struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC); |
421 | if (!ctx) |
422 | return -ENOMEM; |
423 | ctx->ops = &nsfs_ops; |
424 | ctx->dops = &ns_dentry_operations; |
425 | fc->s_fs_info = (void *)&nsfs_stashed_ops; |
426 | return 0; |
427 | } |
428 | |
429 | static struct file_system_type nsfs = { |
430 | .name = "nsfs", |
431 | .init_fs_context = nsfs_init_fs_context, |
432 | .kill_sb = kill_anon_super, |
433 | }; |
434 | |
435 | void __init nsfs_init(void) |
436 | { |
437 | nsfs_mnt = kern_mount(&nsfs); |
438 | if (IS_ERR(ptr: nsfs_mnt)) |
439 | panic(fmt: "can't set nsfs up\n"); |
440 | nsfs_mnt->mnt_sb->s_flags &= ~SB_NOUSER; |
441 | } |
442 |
Definitions
- nsfs_mnt
- ns_file_operations
- ns_dname
- ns_dentry_operations
- nsfs_evict
- ns_get_path_cb
- ns_get_path_task_args
- ns_get_path_task
- ns_get_path
- open_namespace
- open_related_ns
- copy_ns_info_to_user
- nsfs_ioctl_valid
- ns_ioctl
- ns_get_name
- proc_ns_file
- ns_match
- nsfs_show_path
- nsfs_ops
- nsfs_init_inode
- nsfs_put_data
- nsfs_stashed_ops
- nsfs_init_fs_context
- nsfs
Improve your Profiling and Debugging skills
Find out more