| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/fanotify.h> |
| 3 | #include <linux/fcntl.h> |
| 4 | #include <linux/file.h> |
| 5 | #include <linux/fs.h> |
| 6 | #include <linux/anon_inodes.h> |
| 7 | #include <linux/fsnotify_backend.h> |
| 8 | #include <linux/init.h> |
| 9 | #include <linux/mount.h> |
| 10 | #include <linux/namei.h> |
| 11 | #include <linux/poll.h> |
| 12 | #include <linux/security.h> |
| 13 | #include <linux/syscalls.h> |
| 14 | #include <linux/slab.h> |
| 15 | #include <linux/types.h> |
| 16 | #include <linux/uaccess.h> |
| 17 | #include <linux/compat.h> |
| 18 | #include <linux/sched/signal.h> |
| 19 | #include <linux/memcontrol.h> |
| 20 | #include <linux/statfs.h> |
| 21 | #include <linux/exportfs.h> |
| 22 | |
| 23 | #include <asm/ioctls.h> |
| 24 | |
| 25 | #include "../fsnotify.h" |
| 26 | #include "../fdinfo.h" |
| 27 | #include "fanotify.h" |
| 28 | |
| 29 | #define FANOTIFY_DEFAULT_MAX_EVENTS 16384 |
| 30 | #define FANOTIFY_OLD_DEFAULT_MAX_MARKS 8192 |
| 31 | #define FANOTIFY_DEFAULT_MAX_GROUPS 128 |
| 32 | #define FANOTIFY_DEFAULT_FEE_POOL_SIZE 32 |
| 33 | |
| 34 | /* |
| 35 | * Legacy fanotify marks limits (8192) is per group and we introduced a tunable |
| 36 | * limit of marks per user, similar to inotify. Effectively, the legacy limit |
| 37 | * of fanotify marks per user is <max marks per group> * <max groups per user>. |
| 38 | * This default limit (1M) also happens to match the increased limit of inotify |
| 39 | * max_user_watches since v5.10. |
| 40 | */ |
| 41 | #define FANOTIFY_DEFAULT_MAX_USER_MARKS \ |
| 42 | (FANOTIFY_OLD_DEFAULT_MAX_MARKS * FANOTIFY_DEFAULT_MAX_GROUPS) |
| 43 | |
| 44 | /* |
| 45 | * Most of the memory cost of adding an inode mark is pinning the marked inode. |
| 46 | * The size of the filesystem inode struct is not uniform across filesystems, |
| 47 | * so double the size of a VFS inode is used as a conservative approximation. |
| 48 | */ |
| 49 | #define INODE_MARK_COST (2 * sizeof(struct inode)) |
| 50 | |
| 51 | /* configurable via /proc/sys/fs/fanotify/ */ |
| 52 | static int fanotify_max_queued_events __read_mostly; |
| 53 | |
| 54 | #ifdef CONFIG_SYSCTL |
| 55 | |
| 56 | #include <linux/sysctl.h> |
| 57 | |
| 58 | static long ft_zero = 0; |
| 59 | static long ft_int_max = INT_MAX; |
| 60 | |
| 61 | static const struct ctl_table fanotify_table[] = { |
| 62 | { |
| 63 | .procname = "max_user_groups" , |
| 64 | .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], |
| 65 | .maxlen = sizeof(long), |
| 66 | .mode = 0644, |
| 67 | .proc_handler = proc_doulongvec_minmax, |
| 68 | .extra1 = &ft_zero, |
| 69 | .extra2 = &ft_int_max, |
| 70 | }, |
| 71 | { |
| 72 | .procname = "max_user_marks" , |
| 73 | .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS], |
| 74 | .maxlen = sizeof(long), |
| 75 | .mode = 0644, |
| 76 | .proc_handler = proc_doulongvec_minmax, |
| 77 | .extra1 = &ft_zero, |
| 78 | .extra2 = &ft_int_max, |
| 79 | }, |
| 80 | { |
| 81 | .procname = "max_queued_events" , |
| 82 | .data = &fanotify_max_queued_events, |
| 83 | .maxlen = sizeof(int), |
| 84 | .mode = 0644, |
| 85 | .proc_handler = proc_dointvec_minmax, |
| 86 | .extra1 = SYSCTL_ZERO |
| 87 | }, |
| 88 | }; |
| 89 | |
| 90 | static void __init fanotify_sysctls_init(void) |
| 91 | { |
| 92 | register_sysctl("fs/fanotify" , fanotify_table); |
| 93 | } |
| 94 | #else |
| 95 | #define fanotify_sysctls_init() do { } while (0) |
| 96 | #endif /* CONFIG_SYSCTL */ |
| 97 | |
| 98 | /* |
| 99 | * All flags that may be specified in parameter event_f_flags of fanotify_init. |
| 100 | * |
| 101 | * Internal and external open flags are stored together in field f_flags of |
| 102 | * struct file. Only external open flags shall be allowed in event_f_flags. |
| 103 | * Internal flags like FMODE_EXEC shall be excluded. |
| 104 | */ |
| 105 | #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ |
| 106 | O_ACCMODE | O_APPEND | O_NONBLOCK | \ |
| 107 | __O_SYNC | O_DSYNC | O_CLOEXEC | \ |
| 108 | O_LARGEFILE | O_NOATIME ) |
| 109 | |
| 110 | extern const struct fsnotify_ops fanotify_fsnotify_ops; |
| 111 | |
| 112 | struct kmem_cache *fanotify_mark_cache __ro_after_init; |
| 113 | struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; |
| 114 | struct kmem_cache *fanotify_path_event_cachep __ro_after_init; |
| 115 | struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; |
| 116 | struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init; |
| 117 | |
| 118 | #define FANOTIFY_EVENT_ALIGN 4 |
| 119 | #define FANOTIFY_FID_INFO_HDR_LEN \ |
| 120 | (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) |
| 121 | #define FANOTIFY_PIDFD_INFO_LEN \ |
| 122 | sizeof(struct fanotify_event_info_pidfd) |
| 123 | #define FANOTIFY_ERROR_INFO_LEN \ |
| 124 | (sizeof(struct fanotify_event_info_error)) |
| 125 | #define FANOTIFY_RANGE_INFO_LEN \ |
| 126 | (sizeof(struct fanotify_event_info_range)) |
| 127 | #define FANOTIFY_MNT_INFO_LEN \ |
| 128 | (sizeof(struct fanotify_event_info_mnt)) |
| 129 | |
| 130 | static int fanotify_fid_info_len(int fh_len, int name_len) |
| 131 | { |
| 132 | int info_len = fh_len; |
| 133 | |
| 134 | if (name_len) |
| 135 | info_len += name_len + 1; |
| 136 | |
| 137 | return roundup(FANOTIFY_FID_INFO_HDR_LEN + info_len, |
| 138 | FANOTIFY_EVENT_ALIGN); |
| 139 | } |
| 140 | |
| 141 | /* FAN_RENAME may have one or two dir+name info records */ |
| 142 | static int fanotify_dir_name_info_len(struct fanotify_event *event) |
| 143 | { |
| 144 | struct fanotify_info *info = fanotify_event_info(event); |
| 145 | int dir_fh_len = fanotify_event_dir_fh_len(event); |
| 146 | int dir2_fh_len = fanotify_event_dir2_fh_len(event); |
| 147 | int info_len = 0; |
| 148 | |
| 149 | if (dir_fh_len) |
| 150 | info_len += fanotify_fid_info_len(fh_len: dir_fh_len, |
| 151 | name_len: info->name_len); |
| 152 | if (dir2_fh_len) |
| 153 | info_len += fanotify_fid_info_len(fh_len: dir2_fh_len, |
| 154 | name_len: info->name2_len); |
| 155 | |
| 156 | return info_len; |
| 157 | } |
| 158 | |
| 159 | static size_t fanotify_event_len(unsigned int info_mode, |
| 160 | struct fanotify_event *event) |
| 161 | { |
| 162 | size_t event_len = FAN_EVENT_METADATA_LEN; |
| 163 | int fh_len; |
| 164 | int dot_len = 0; |
| 165 | |
| 166 | if (fanotify_is_error_event(mask: event->mask)) |
| 167 | event_len += FANOTIFY_ERROR_INFO_LEN; |
| 168 | |
| 169 | if (fanotify_event_has_any_dir_fh(event)) { |
| 170 | event_len += fanotify_dir_name_info_len(event); |
| 171 | } else if ((info_mode & FAN_REPORT_NAME) && |
| 172 | (event->mask & FAN_ONDIR)) { |
| 173 | /* |
| 174 | * With group flag FAN_REPORT_NAME, if name was not recorded in |
| 175 | * event on a directory, we will report the name ".". |
| 176 | */ |
| 177 | dot_len = 1; |
| 178 | } |
| 179 | |
| 180 | if (fanotify_event_has_object_fh(event)) { |
| 181 | fh_len = fanotify_event_object_fh_len(event); |
| 182 | event_len += fanotify_fid_info_len(fh_len, name_len: dot_len); |
| 183 | } |
| 184 | if (fanotify_is_mnt_event(mask: event->mask)) |
| 185 | event_len += FANOTIFY_MNT_INFO_LEN; |
| 186 | |
| 187 | if (info_mode & FAN_REPORT_PIDFD) |
| 188 | event_len += FANOTIFY_PIDFD_INFO_LEN; |
| 189 | |
| 190 | if (fanotify_event_has_access_range(event)) |
| 191 | event_len += FANOTIFY_RANGE_INFO_LEN; |
| 192 | |
| 193 | return event_len; |
| 194 | } |
| 195 | |
| 196 | /* |
| 197 | * Remove an hashed event from merge hash table. |
| 198 | */ |
| 199 | static void fanotify_unhash_event(struct fsnotify_group *group, |
| 200 | struct fanotify_event *event) |
| 201 | { |
| 202 | assert_spin_locked(&group->notification_lock); |
| 203 | |
| 204 | pr_debug("%s: group=%p event=%p bucket=%u\n" , __func__, |
| 205 | group, event, fanotify_event_hash_bucket(group, event)); |
| 206 | |
| 207 | if (WARN_ON_ONCE(hlist_unhashed(&event->merge_list))) |
| 208 | return; |
| 209 | |
| 210 | hlist_del_init(n: &event->merge_list); |
| 211 | } |
| 212 | |
| 213 | /* |
| 214 | * Get an fanotify notification event if one exists and is small |
| 215 | * enough to fit in "count". Return an error pointer if the count |
| 216 | * is not large enough. When permission event is dequeued, its state is |
| 217 | * updated accordingly. |
| 218 | */ |
| 219 | static struct fanotify_event *get_one_event(struct fsnotify_group *group, |
| 220 | size_t count) |
| 221 | { |
| 222 | size_t event_size; |
| 223 | struct fanotify_event *event = NULL; |
| 224 | struct fsnotify_event *fsn_event; |
| 225 | unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); |
| 226 | |
| 227 | pr_debug("%s: group=%p count=%zd\n" , __func__, group, count); |
| 228 | |
| 229 | spin_lock(lock: &group->notification_lock); |
| 230 | fsn_event = fsnotify_peek_first_event(group); |
| 231 | if (!fsn_event) |
| 232 | goto out; |
| 233 | |
| 234 | event = FANOTIFY_E(fse: fsn_event); |
| 235 | event_size = fanotify_event_len(info_mode, event); |
| 236 | |
| 237 | if (event_size > count) { |
| 238 | event = ERR_PTR(error: -EINVAL); |
| 239 | goto out; |
| 240 | } |
| 241 | |
| 242 | /* |
| 243 | * Held the notification_lock the whole time, so this is the |
| 244 | * same event we peeked above. |
| 245 | */ |
| 246 | fsnotify_remove_first_event(group); |
| 247 | if (fanotify_is_perm_event(mask: event->mask)) |
| 248 | FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED; |
| 249 | if (fanotify_is_hashed_event(mask: event->mask)) |
| 250 | fanotify_unhash_event(group, event); |
| 251 | out: |
| 252 | spin_unlock(lock: &group->notification_lock); |
| 253 | return event; |
| 254 | } |
| 255 | |
| 256 | static int create_fd(struct fsnotify_group *group, const struct path *path, |
| 257 | struct file **file) |
| 258 | { |
| 259 | int client_fd; |
| 260 | struct file *new_file; |
| 261 | |
| 262 | client_fd = get_unused_fd_flags(flags: group->fanotify_data.f_flags); |
| 263 | if (client_fd < 0) |
| 264 | return client_fd; |
| 265 | |
| 266 | /* |
| 267 | * We provide an fd for the userspace program, so it could access the |
| 268 | * file without generating fanotify events itself. |
| 269 | */ |
| 270 | new_file = dentry_open_nonotify(path, flags: group->fanotify_data.f_flags, |
| 271 | current_cred()); |
| 272 | if (IS_ERR(ptr: new_file)) { |
| 273 | put_unused_fd(fd: client_fd); |
| 274 | client_fd = PTR_ERR(ptr: new_file); |
| 275 | } else { |
| 276 | *file = new_file; |
| 277 | } |
| 278 | |
| 279 | return client_fd; |
| 280 | } |
| 281 | |
| 282 | static int process_access_response_info(const char __user *info, |
| 283 | size_t info_len, |
| 284 | struct fanotify_response_info_audit_rule *friar) |
| 285 | { |
| 286 | if (info_len != sizeof(*friar)) |
| 287 | return -EINVAL; |
| 288 | |
| 289 | if (copy_from_user(to: friar, from: info, n: sizeof(*friar))) |
| 290 | return -EFAULT; |
| 291 | |
| 292 | if (friar->hdr.type != FAN_RESPONSE_INFO_AUDIT_RULE) |
| 293 | return -EINVAL; |
| 294 | if (friar->hdr.pad != 0) |
| 295 | return -EINVAL; |
| 296 | if (friar->hdr.len != sizeof(*friar)) |
| 297 | return -EINVAL; |
| 298 | |
| 299 | return info_len; |
| 300 | } |
| 301 | |
| 302 | /* |
| 303 | * Finish processing of permission event by setting it to ANSWERED state and |
| 304 | * drop group->notification_lock. |
| 305 | */ |
| 306 | static void finish_permission_event(struct fsnotify_group *group, |
| 307 | struct fanotify_perm_event *event, u32 response, |
| 308 | struct fanotify_response_info_audit_rule *friar) |
| 309 | __releases(&group->notification_lock) |
| 310 | { |
| 311 | bool destroy = false; |
| 312 | |
| 313 | assert_spin_locked(&group->notification_lock); |
| 314 | event->response = response & ~FAN_INFO; |
| 315 | if (response & FAN_INFO) |
| 316 | memcpy(&event->audit_rule, friar, sizeof(*friar)); |
| 317 | |
| 318 | if (event->state == FAN_EVENT_CANCELED) |
| 319 | destroy = true; |
| 320 | else |
| 321 | event->state = FAN_EVENT_ANSWERED; |
| 322 | spin_unlock(lock: &group->notification_lock); |
| 323 | if (destroy) |
| 324 | fsnotify_destroy_event(group, event: &event->fae.fse); |
| 325 | } |
| 326 | |
| 327 | static int process_access_response(struct fsnotify_group *group, |
| 328 | struct fanotify_response *response_struct, |
| 329 | const char __user *info, |
| 330 | size_t info_len) |
| 331 | { |
| 332 | struct fanotify_perm_event *event; |
| 333 | int fd = response_struct->fd; |
| 334 | u32 response = response_struct->response; |
| 335 | int errno = fanotify_get_response_errno(res: response); |
| 336 | int ret = info_len; |
| 337 | struct fanotify_response_info_audit_rule friar; |
| 338 | |
| 339 | pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n" , |
| 340 | __func__, group, fd, response, errno, info, info_len); |
| 341 | /* |
| 342 | * make sure the response is valid, if invalid we do nothing and either |
| 343 | * userspace can send a valid response or we will clean it up after the |
| 344 | * timeout |
| 345 | */ |
| 346 | if (response & ~FANOTIFY_RESPONSE_VALID_MASK) |
| 347 | return -EINVAL; |
| 348 | |
| 349 | switch (response & FANOTIFY_RESPONSE_ACCESS) { |
| 350 | case FAN_ALLOW: |
| 351 | if (errno) |
| 352 | return -EINVAL; |
| 353 | break; |
| 354 | case FAN_DENY: |
| 355 | /* Custom errno is supported only for pre-content groups */ |
| 356 | if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT) |
| 357 | return -EINVAL; |
| 358 | |
| 359 | /* |
| 360 | * Limit errno to values expected on open(2)/read(2)/write(2) |
| 361 | * of regular files. |
| 362 | */ |
| 363 | switch (errno) { |
| 364 | case 0: |
| 365 | case EIO: |
| 366 | case EPERM: |
| 367 | case EBUSY: |
| 368 | case ETXTBSY: |
| 369 | case EAGAIN: |
| 370 | case ENOSPC: |
| 371 | case EDQUOT: |
| 372 | break; |
| 373 | default: |
| 374 | return -EINVAL; |
| 375 | } |
| 376 | break; |
| 377 | default: |
| 378 | return -EINVAL; |
| 379 | } |
| 380 | |
| 381 | if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT)) |
| 382 | return -EINVAL; |
| 383 | |
| 384 | if (response & FAN_INFO) { |
| 385 | ret = process_access_response_info(info, info_len, friar: &friar); |
| 386 | if (ret < 0) |
| 387 | return ret; |
| 388 | if (fd == FAN_NOFD) |
| 389 | return ret; |
| 390 | } else { |
| 391 | ret = 0; |
| 392 | } |
| 393 | |
| 394 | if (fd < 0) |
| 395 | return -EINVAL; |
| 396 | |
| 397 | spin_lock(lock: &group->notification_lock); |
| 398 | list_for_each_entry(event, &group->fanotify_data.access_list, |
| 399 | fae.fse.list) { |
| 400 | if (event->fd != fd) |
| 401 | continue; |
| 402 | |
| 403 | list_del_init(entry: &event->fae.fse.list); |
| 404 | finish_permission_event(group, event, response, friar: &friar); |
| 405 | wake_up(&group->fanotify_data.access_waitq); |
| 406 | return ret; |
| 407 | } |
| 408 | spin_unlock(lock: &group->notification_lock); |
| 409 | |
| 410 | return -ENOENT; |
| 411 | } |
| 412 | |
| 413 | static size_t copy_mnt_info_to_user(struct fanotify_event *event, |
| 414 | char __user *buf, int count) |
| 415 | { |
| 416 | struct fanotify_event_info_mnt info = { }; |
| 417 | |
| 418 | info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT; |
| 419 | info.hdr.len = FANOTIFY_MNT_INFO_LEN; |
| 420 | |
| 421 | if (WARN_ON(count < info.hdr.len)) |
| 422 | return -EFAULT; |
| 423 | |
| 424 | info.mnt_id = FANOTIFY_ME(event)->mnt_id; |
| 425 | |
| 426 | if (copy_to_user(to: buf, from: &info, n: sizeof(info))) |
| 427 | return -EFAULT; |
| 428 | |
| 429 | return info.hdr.len; |
| 430 | } |
| 431 | |
| 432 | static size_t copy_error_info_to_user(struct fanotify_event *event, |
| 433 | char __user *buf, int count) |
| 434 | { |
| 435 | struct fanotify_event_info_error info = { }; |
| 436 | struct fanotify_error_event *fee = FANOTIFY_EE(event); |
| 437 | |
| 438 | info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR; |
| 439 | info.hdr.len = FANOTIFY_ERROR_INFO_LEN; |
| 440 | |
| 441 | if (WARN_ON(count < info.hdr.len)) |
| 442 | return -EFAULT; |
| 443 | |
| 444 | info.error = fee->error; |
| 445 | info.error_count = fee->err_count; |
| 446 | |
| 447 | if (copy_to_user(to: buf, from: &info, n: sizeof(info))) |
| 448 | return -EFAULT; |
| 449 | |
| 450 | return info.hdr.len; |
| 451 | } |
| 452 | |
| 453 | static int copy_fid_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh, |
| 454 | int info_type, const char *name, |
| 455 | size_t name_len, |
| 456 | char __user *buf, size_t count) |
| 457 | { |
| 458 | struct fanotify_event_info_fid info = { }; |
| 459 | struct file_handle handle = { }; |
| 460 | unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf; |
| 461 | size_t fh_len = fh ? fh->len : 0; |
| 462 | size_t info_len = fanotify_fid_info_len(fh_len, name_len); |
| 463 | size_t len = info_len; |
| 464 | |
| 465 | pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n" , |
| 466 | __func__, fh_len, name_len, info_len, count); |
| 467 | |
| 468 | if (WARN_ON_ONCE(len < sizeof(info) || len > count)) |
| 469 | return -EFAULT; |
| 470 | |
| 471 | /* |
| 472 | * Copy event info fid header followed by variable sized file handle |
| 473 | * and optionally followed by variable sized filename. |
| 474 | */ |
| 475 | switch (info_type) { |
| 476 | case FAN_EVENT_INFO_TYPE_FID: |
| 477 | case FAN_EVENT_INFO_TYPE_DFID: |
| 478 | if (WARN_ON_ONCE(name_len)) |
| 479 | return -EFAULT; |
| 480 | break; |
| 481 | case FAN_EVENT_INFO_TYPE_DFID_NAME: |
| 482 | case FAN_EVENT_INFO_TYPE_OLD_DFID_NAME: |
| 483 | case FAN_EVENT_INFO_TYPE_NEW_DFID_NAME: |
| 484 | if (WARN_ON_ONCE(!name || !name_len)) |
| 485 | return -EFAULT; |
| 486 | break; |
| 487 | default: |
| 488 | return -EFAULT; |
| 489 | } |
| 490 | |
| 491 | info.hdr.info_type = info_type; |
| 492 | info.hdr.len = len; |
| 493 | info.fsid = *fsid; |
| 494 | if (copy_to_user(to: buf, from: &info, n: sizeof(info))) |
| 495 | return -EFAULT; |
| 496 | |
| 497 | buf += sizeof(info); |
| 498 | len -= sizeof(info); |
| 499 | if (WARN_ON_ONCE(len < sizeof(handle))) |
| 500 | return -EFAULT; |
| 501 | |
| 502 | handle.handle_type = fh->type; |
| 503 | handle.handle_bytes = fh_len; |
| 504 | |
| 505 | /* Mangle handle_type for bad file_handle */ |
| 506 | if (!fh_len) |
| 507 | handle.handle_type = FILEID_INVALID; |
| 508 | |
| 509 | if (copy_to_user(to: buf, from: &handle, n: sizeof(handle))) |
| 510 | return -EFAULT; |
| 511 | |
| 512 | buf += sizeof(handle); |
| 513 | len -= sizeof(handle); |
| 514 | if (WARN_ON_ONCE(len < fh_len)) |
| 515 | return -EFAULT; |
| 516 | |
| 517 | /* |
| 518 | * For an inline fh and inline file name, copy through stack to exclude |
| 519 | * the copy from usercopy hardening protections. |
| 520 | */ |
| 521 | fh_buf = fanotify_fh_buf(fh); |
| 522 | if (fh_len <= FANOTIFY_INLINE_FH_LEN) { |
| 523 | memcpy(bounce, fh_buf, fh_len); |
| 524 | fh_buf = bounce; |
| 525 | } |
| 526 | if (copy_to_user(to: buf, from: fh_buf, n: fh_len)) |
| 527 | return -EFAULT; |
| 528 | |
| 529 | buf += fh_len; |
| 530 | len -= fh_len; |
| 531 | |
| 532 | if (name_len) { |
| 533 | /* Copy the filename with terminating null */ |
| 534 | name_len++; |
| 535 | if (WARN_ON_ONCE(len < name_len)) |
| 536 | return -EFAULT; |
| 537 | |
| 538 | if (copy_to_user(to: buf, from: name, n: name_len)) |
| 539 | return -EFAULT; |
| 540 | |
| 541 | buf += name_len; |
| 542 | len -= name_len; |
| 543 | } |
| 544 | |
| 545 | /* Pad with 0's */ |
| 546 | WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN); |
| 547 | if (len > 0 && clear_user(to: buf, n: len)) |
| 548 | return -EFAULT; |
| 549 | |
| 550 | return info_len; |
| 551 | } |
| 552 | |
| 553 | static int copy_pidfd_info_to_user(int pidfd, |
| 554 | char __user *buf, |
| 555 | size_t count) |
| 556 | { |
| 557 | struct fanotify_event_info_pidfd info = { }; |
| 558 | size_t info_len = FANOTIFY_PIDFD_INFO_LEN; |
| 559 | |
| 560 | if (WARN_ON_ONCE(info_len > count)) |
| 561 | return -EFAULT; |
| 562 | |
| 563 | info.hdr.info_type = FAN_EVENT_INFO_TYPE_PIDFD; |
| 564 | info.hdr.len = info_len; |
| 565 | info.pidfd = pidfd; |
| 566 | |
| 567 | if (copy_to_user(to: buf, from: &info, n: info_len)) |
| 568 | return -EFAULT; |
| 569 | |
| 570 | return info_len; |
| 571 | } |
| 572 | |
| 573 | static size_t copy_range_info_to_user(struct fanotify_event *event, |
| 574 | char __user *buf, int count) |
| 575 | { |
| 576 | struct fanotify_perm_event *pevent = FANOTIFY_PERM(event); |
| 577 | struct fanotify_event_info_range info = { }; |
| 578 | size_t info_len = FANOTIFY_RANGE_INFO_LEN; |
| 579 | |
| 580 | if (WARN_ON_ONCE(info_len > count)) |
| 581 | return -EFAULT; |
| 582 | |
| 583 | if (WARN_ON_ONCE(!pevent->ppos)) |
| 584 | return -EINVAL; |
| 585 | |
| 586 | info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE; |
| 587 | info.hdr.len = info_len; |
| 588 | info.offset = *(pevent->ppos); |
| 589 | info.count = pevent->count; |
| 590 | |
| 591 | if (copy_to_user(to: buf, from: &info, n: info_len)) |
| 592 | return -EFAULT; |
| 593 | |
| 594 | return info_len; |
| 595 | } |
| 596 | |
| 597 | static int copy_info_records_to_user(struct fanotify_event *event, |
| 598 | struct fanotify_info *info, |
| 599 | unsigned int info_mode, int pidfd, |
| 600 | char __user *buf, size_t count) |
| 601 | { |
| 602 | int ret, total_bytes = 0, info_type = 0; |
| 603 | unsigned int fid_mode = info_mode & FANOTIFY_FID_BITS; |
| 604 | unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; |
| 605 | |
| 606 | /* |
| 607 | * Event info records order is as follows: |
| 608 | * 1. dir fid + name |
| 609 | * 2. (optional) new dir fid + new name |
| 610 | * 3. (optional) child fid |
| 611 | */ |
| 612 | if (fanotify_event_has_dir_fh(event)) { |
| 613 | info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : |
| 614 | FAN_EVENT_INFO_TYPE_DFID; |
| 615 | |
| 616 | /* FAN_RENAME uses special info types */ |
| 617 | if (event->mask & FAN_RENAME) |
| 618 | info_type = FAN_EVENT_INFO_TYPE_OLD_DFID_NAME; |
| 619 | |
| 620 | ret = copy_fid_info_to_user(fsid: fanotify_event_fsid(event), |
| 621 | fh: fanotify_info_dir_fh(info), |
| 622 | info_type, |
| 623 | name: fanotify_info_name(info), |
| 624 | name_len: info->name_len, buf, count); |
| 625 | if (ret < 0) |
| 626 | return ret; |
| 627 | |
| 628 | buf += ret; |
| 629 | count -= ret; |
| 630 | total_bytes += ret; |
| 631 | } |
| 632 | |
| 633 | /* New dir fid+name may be reported in addition to old dir fid+name */ |
| 634 | if (fanotify_event_has_dir2_fh(event)) { |
| 635 | info_type = FAN_EVENT_INFO_TYPE_NEW_DFID_NAME; |
| 636 | ret = copy_fid_info_to_user(fsid: fanotify_event_fsid(event), |
| 637 | fh: fanotify_info_dir2_fh(info), |
| 638 | info_type, |
| 639 | name: fanotify_info_name2(info), |
| 640 | name_len: info->name2_len, buf, count); |
| 641 | if (ret < 0) |
| 642 | return ret; |
| 643 | |
| 644 | buf += ret; |
| 645 | count -= ret; |
| 646 | total_bytes += ret; |
| 647 | } |
| 648 | |
| 649 | if (fanotify_event_has_object_fh(event)) { |
| 650 | const char *dot = NULL; |
| 651 | int dot_len = 0; |
| 652 | |
| 653 | if (fid_mode == FAN_REPORT_FID || info_type) { |
| 654 | /* |
| 655 | * With only group flag FAN_REPORT_FID only type FID is |
| 656 | * reported. Second info record type is always FID. |
| 657 | */ |
| 658 | info_type = FAN_EVENT_INFO_TYPE_FID; |
| 659 | } else if ((fid_mode & FAN_REPORT_NAME) && |
| 660 | (event->mask & FAN_ONDIR)) { |
| 661 | /* |
| 662 | * With group flag FAN_REPORT_NAME, if name was not |
| 663 | * recorded in an event on a directory, report the name |
| 664 | * "." with info type DFID_NAME. |
| 665 | */ |
| 666 | info_type = FAN_EVENT_INFO_TYPE_DFID_NAME; |
| 667 | dot = "." ; |
| 668 | dot_len = 1; |
| 669 | } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) || |
| 670 | (event->mask & FAN_ONDIR)) { |
| 671 | /* |
| 672 | * With group flag FAN_REPORT_DIR_FID, a single info |
| 673 | * record has type DFID for directory entry modification |
| 674 | * event and for event on a directory. |
| 675 | */ |
| 676 | info_type = FAN_EVENT_INFO_TYPE_DFID; |
| 677 | } else { |
| 678 | /* |
| 679 | * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID, |
| 680 | * a single info record has type FID for event on a |
| 681 | * non-directory, when there is no directory to report. |
| 682 | * For example, on FAN_DELETE_SELF event. |
| 683 | */ |
| 684 | info_type = FAN_EVENT_INFO_TYPE_FID; |
| 685 | } |
| 686 | |
| 687 | ret = copy_fid_info_to_user(fsid: fanotify_event_fsid(event), |
| 688 | fh: fanotify_event_object_fh(event), |
| 689 | info_type, name: dot, name_len: dot_len, |
| 690 | buf, count); |
| 691 | if (ret < 0) |
| 692 | return ret; |
| 693 | |
| 694 | buf += ret; |
| 695 | count -= ret; |
| 696 | total_bytes += ret; |
| 697 | } |
| 698 | |
| 699 | if (pidfd_mode) { |
| 700 | ret = copy_pidfd_info_to_user(pidfd, buf, count); |
| 701 | if (ret < 0) |
| 702 | return ret; |
| 703 | |
| 704 | buf += ret; |
| 705 | count -= ret; |
| 706 | total_bytes += ret; |
| 707 | } |
| 708 | |
| 709 | if (fanotify_is_error_event(mask: event->mask)) { |
| 710 | ret = copy_error_info_to_user(event, buf, count); |
| 711 | if (ret < 0) |
| 712 | return ret; |
| 713 | buf += ret; |
| 714 | count -= ret; |
| 715 | total_bytes += ret; |
| 716 | } |
| 717 | |
| 718 | if (fanotify_event_has_access_range(event)) { |
| 719 | ret = copy_range_info_to_user(event, buf, count); |
| 720 | if (ret < 0) |
| 721 | return ret; |
| 722 | buf += ret; |
| 723 | count -= ret; |
| 724 | total_bytes += ret; |
| 725 | } |
| 726 | |
| 727 | if (fanotify_is_mnt_event(mask: event->mask)) { |
| 728 | ret = copy_mnt_info_to_user(event, buf, count); |
| 729 | if (ret < 0) |
| 730 | return ret; |
| 731 | buf += ret; |
| 732 | count -= ret; |
| 733 | total_bytes += ret; |
| 734 | } |
| 735 | |
| 736 | return total_bytes; |
| 737 | } |
| 738 | |
| 739 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
| 740 | struct fanotify_event *event, |
| 741 | char __user *buf, size_t count) |
| 742 | { |
| 743 | struct fanotify_event_metadata metadata; |
| 744 | const struct path *path = fanotify_event_path(event); |
| 745 | struct fanotify_info *info = fanotify_event_info(event); |
| 746 | unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); |
| 747 | unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; |
| 748 | struct file *f = NULL, *pidfd_file = NULL; |
| 749 | int ret, pidfd = -ESRCH, fd = -EBADF; |
| 750 | |
| 751 | pr_debug("%s: group=%p event=%p\n" , __func__, group, event); |
| 752 | |
| 753 | metadata.event_len = fanotify_event_len(info_mode, event); |
| 754 | metadata.metadata_len = FAN_EVENT_METADATA_LEN; |
| 755 | metadata.vers = FANOTIFY_METADATA_VERSION; |
| 756 | metadata.reserved = 0; |
| 757 | metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS; |
| 758 | metadata.pid = pid_vnr(pid: event->pid); |
| 759 | /* |
| 760 | * For an unprivileged listener, event->pid can be used to identify the |
| 761 | * events generated by the listener process itself, without disclosing |
| 762 | * the pids of other processes. |
| 763 | */ |
| 764 | if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && |
| 765 | task_tgid(current) != event->pid) |
| 766 | metadata.pid = 0; |
| 767 | |
| 768 | /* |
| 769 | * For now, fid mode is required for an unprivileged listener and |
| 770 | * fid mode does not report fd in events. Keep this check anyway |
| 771 | * for safety in case fid mode requirement is relaxed in the future |
| 772 | * to allow unprivileged listener to get events with no fd and no fid. |
| 773 | */ |
| 774 | if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) && |
| 775 | path && path->mnt && path->dentry) { |
| 776 | fd = create_fd(group, path, file: &f); |
| 777 | /* |
| 778 | * Opening an fd from dentry can fail for several reasons. |
| 779 | * For example, when tasks are gone and we try to open their |
| 780 | * /proc files or we try to open a WRONLY file like in sysfs |
| 781 | * or when trying to open a file that was deleted on the |
| 782 | * remote network server. |
| 783 | * |
| 784 | * For a group with FAN_REPORT_FD_ERROR, we will send the |
| 785 | * event with the error instead of the open fd, otherwise |
| 786 | * Userspace may not get the error at all. |
| 787 | * In any case, userspace will not know which file failed to |
| 788 | * open, so add a debug print for further investigation. |
| 789 | */ |
| 790 | if (fd < 0) { |
| 791 | pr_debug("fanotify: create_fd(%pd2) failed err=%d\n" , |
| 792 | path->dentry, fd); |
| 793 | if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) { |
| 794 | /* |
| 795 | * Historically, we've handled EOPENSTALE in a |
| 796 | * special way and silently dropped such |
| 797 | * events. Now we have to keep it to maintain |
| 798 | * backward compatibility... |
| 799 | */ |
| 800 | if (fd == -EOPENSTALE) |
| 801 | fd = 0; |
| 802 | return fd; |
| 803 | } |
| 804 | } |
| 805 | } |
| 806 | if (FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR)) |
| 807 | metadata.fd = fd; |
| 808 | else |
| 809 | metadata.fd = fd >= 0 ? fd : FAN_NOFD; |
| 810 | |
| 811 | if (pidfd_mode) { |
| 812 | /* |
| 813 | * Complain if the FAN_REPORT_PIDFD and FAN_REPORT_TID mutual |
| 814 | * exclusion is ever lifted. At the time of incoporating pidfd |
| 815 | * support within fanotify, the pidfd API only supported the |
| 816 | * creation of pidfds for thread-group leaders. |
| 817 | */ |
| 818 | WARN_ON_ONCE(FAN_GROUP_FLAG(group, FAN_REPORT_TID)); |
| 819 | |
| 820 | /* |
| 821 | * The PIDTYPE_TGID check for an event->pid is performed |
| 822 | * preemptively in an attempt to catch out cases where the event |
| 823 | * listener reads events after the event generating process has |
| 824 | * already terminated. Depending on flag FAN_REPORT_FD_ERROR, |
| 825 | * report either -ESRCH or FAN_NOPIDFD to the event listener in |
| 826 | * those cases with all other pidfd creation errors reported as |
| 827 | * the error code itself or as FAN_EPIDFD. |
| 828 | */ |
| 829 | if (metadata.pid && pid_has_task(pid: event->pid, type: PIDTYPE_TGID)) |
| 830 | pidfd = pidfd_prepare(pid: event->pid, flags: 0, ret_file: &pidfd_file); |
| 831 | |
| 832 | if (!FAN_GROUP_FLAG(group, FAN_REPORT_FD_ERROR) && pidfd < 0) |
| 833 | pidfd = pidfd == -ESRCH ? FAN_NOPIDFD : FAN_EPIDFD; |
| 834 | } |
| 835 | |
| 836 | ret = -EFAULT; |
| 837 | /* |
| 838 | * Sanity check copy size in case get_one_event() and |
| 839 | * event_len sizes ever get out of sync. |
| 840 | */ |
| 841 | if (WARN_ON_ONCE(metadata.event_len > count)) |
| 842 | goto out_close_fd; |
| 843 | |
| 844 | if (copy_to_user(to: buf, from: &metadata, FAN_EVENT_METADATA_LEN)) |
| 845 | goto out_close_fd; |
| 846 | |
| 847 | buf += FAN_EVENT_METADATA_LEN; |
| 848 | count -= FAN_EVENT_METADATA_LEN; |
| 849 | |
| 850 | ret = copy_info_records_to_user(event, info, info_mode, pidfd, |
| 851 | buf, count); |
| 852 | if (ret < 0) |
| 853 | goto out_close_fd; |
| 854 | |
| 855 | if (f) |
| 856 | fd_install(fd, file: f); |
| 857 | |
| 858 | if (pidfd_file) |
| 859 | fd_install(fd: pidfd, file: pidfd_file); |
| 860 | |
| 861 | if (fanotify_is_perm_event(mask: event->mask)) |
| 862 | FANOTIFY_PERM(event)->fd = fd; |
| 863 | |
| 864 | return metadata.event_len; |
| 865 | |
| 866 | out_close_fd: |
| 867 | if (f) { |
| 868 | put_unused_fd(fd); |
| 869 | fput(f); |
| 870 | } |
| 871 | |
| 872 | if (pidfd_file) { |
| 873 | put_unused_fd(fd: pidfd); |
| 874 | fput(pidfd_file); |
| 875 | } |
| 876 | |
| 877 | return ret; |
| 878 | } |
| 879 | |
| 880 | /* intofiy userspace file descriptor functions */ |
| 881 | static __poll_t fanotify_poll(struct file *file, poll_table *wait) |
| 882 | { |
| 883 | struct fsnotify_group *group = file->private_data; |
| 884 | __poll_t ret = 0; |
| 885 | |
| 886 | poll_wait(filp: file, wait_address: &group->notification_waitq, p: wait); |
| 887 | spin_lock(lock: &group->notification_lock); |
| 888 | if (!fsnotify_notify_queue_is_empty(group)) |
| 889 | ret = EPOLLIN | EPOLLRDNORM; |
| 890 | spin_unlock(lock: &group->notification_lock); |
| 891 | |
| 892 | return ret; |
| 893 | } |
| 894 | |
| 895 | static ssize_t fanotify_read(struct file *file, char __user *buf, |
| 896 | size_t count, loff_t *pos) |
| 897 | { |
| 898 | struct fsnotify_group *group; |
| 899 | struct fanotify_event *event; |
| 900 | char __user *start; |
| 901 | int ret; |
| 902 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 903 | |
| 904 | start = buf; |
| 905 | group = file->private_data; |
| 906 | |
| 907 | pr_debug("%s: group=%p\n" , __func__, group); |
| 908 | |
| 909 | add_wait_queue(wq_head: &group->notification_waitq, wq_entry: &wait); |
| 910 | while (1) { |
| 911 | /* |
| 912 | * User can supply arbitrarily large buffer. Avoid softlockups |
| 913 | * in case there are lots of available events. |
| 914 | */ |
| 915 | cond_resched(); |
| 916 | event = get_one_event(group, count); |
| 917 | if (IS_ERR(ptr: event)) { |
| 918 | ret = PTR_ERR(ptr: event); |
| 919 | break; |
| 920 | } |
| 921 | |
| 922 | if (!event) { |
| 923 | ret = -EAGAIN; |
| 924 | if (file->f_flags & O_NONBLOCK) |
| 925 | break; |
| 926 | |
| 927 | ret = -ERESTARTSYS; |
| 928 | if (signal_pending(current)) |
| 929 | break; |
| 930 | |
| 931 | if (start != buf) |
| 932 | break; |
| 933 | |
| 934 | wait_woken(wq_entry: &wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); |
| 935 | continue; |
| 936 | } |
| 937 | |
| 938 | ret = copy_event_to_user(group, event, buf, count); |
| 939 | |
| 940 | /* |
| 941 | * Permission events get queued to wait for response. Other |
| 942 | * events can be destroyed now. |
| 943 | */ |
| 944 | if (!fanotify_is_perm_event(mask: event->mask)) { |
| 945 | fsnotify_destroy_event(group, event: &event->fse); |
| 946 | } else { |
| 947 | if (ret <= 0 || FANOTIFY_PERM(event)->fd < 0) { |
| 948 | spin_lock(lock: &group->notification_lock); |
| 949 | finish_permission_event(group, |
| 950 | event: FANOTIFY_PERM(event), FAN_DENY, NULL); |
| 951 | wake_up(&group->fanotify_data.access_waitq); |
| 952 | } else { |
| 953 | spin_lock(lock: &group->notification_lock); |
| 954 | list_add_tail(new: &event->fse.list, |
| 955 | head: &group->fanotify_data.access_list); |
| 956 | spin_unlock(lock: &group->notification_lock); |
| 957 | } |
| 958 | } |
| 959 | if (ret < 0) |
| 960 | break; |
| 961 | buf += ret; |
| 962 | count -= ret; |
| 963 | } |
| 964 | remove_wait_queue(wq_head: &group->notification_waitq, wq_entry: &wait); |
| 965 | |
| 966 | if (start != buf && ret != -EFAULT) |
| 967 | ret = buf - start; |
| 968 | return ret; |
| 969 | } |
| 970 | |
| 971 | static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) |
| 972 | { |
| 973 | struct fanotify_response response; |
| 974 | struct fsnotify_group *group; |
| 975 | int ret; |
| 976 | const char __user *info_buf = buf + sizeof(struct fanotify_response); |
| 977 | size_t info_len; |
| 978 | |
| 979 | if (!IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) |
| 980 | return -EINVAL; |
| 981 | |
| 982 | group = file->private_data; |
| 983 | |
| 984 | pr_debug("%s: group=%p count=%zu\n" , __func__, group, count); |
| 985 | |
| 986 | if (count < sizeof(response)) |
| 987 | return -EINVAL; |
| 988 | |
| 989 | if (copy_from_user(to: &response, from: buf, n: sizeof(response))) |
| 990 | return -EFAULT; |
| 991 | |
| 992 | info_len = count - sizeof(response); |
| 993 | |
| 994 | ret = process_access_response(group, response_struct: &response, info: info_buf, info_len); |
| 995 | if (ret < 0) |
| 996 | count = ret; |
| 997 | else |
| 998 | count = sizeof(response) + ret; |
| 999 | |
| 1000 | return count; |
| 1001 | } |
| 1002 | |
| 1003 | static int fanotify_release(struct inode *ignored, struct file *file) |
| 1004 | { |
| 1005 | struct fsnotify_group *group = file->private_data; |
| 1006 | struct fsnotify_event *fsn_event; |
| 1007 | |
| 1008 | /* |
| 1009 | * Stop new events from arriving in the notification queue. since |
| 1010 | * userspace cannot use fanotify fd anymore, no event can enter or |
| 1011 | * leave access_list by now either. |
| 1012 | */ |
| 1013 | fsnotify_group_stop_queueing(group); |
| 1014 | |
| 1015 | /* |
| 1016 | * Process all permission events on access_list and notification queue |
| 1017 | * and simulate reply from userspace. |
| 1018 | */ |
| 1019 | spin_lock(lock: &group->notification_lock); |
| 1020 | while (!list_empty(head: &group->fanotify_data.access_list)) { |
| 1021 | struct fanotify_perm_event *event; |
| 1022 | |
| 1023 | event = list_first_entry(&group->fanotify_data.access_list, |
| 1024 | struct fanotify_perm_event, fae.fse.list); |
| 1025 | list_del_init(entry: &event->fae.fse.list); |
| 1026 | finish_permission_event(group, event, FAN_ALLOW, NULL); |
| 1027 | spin_lock(lock: &group->notification_lock); |
| 1028 | } |
| 1029 | |
| 1030 | /* |
| 1031 | * Destroy all non-permission events. For permission events just |
| 1032 | * dequeue them and set the response. They will be freed once the |
| 1033 | * response is consumed and fanotify_get_response() returns. |
| 1034 | */ |
| 1035 | while ((fsn_event = fsnotify_remove_first_event(group))) { |
| 1036 | struct fanotify_event *event = FANOTIFY_E(fse: fsn_event); |
| 1037 | |
| 1038 | if (!(event->mask & FANOTIFY_PERM_EVENTS)) { |
| 1039 | spin_unlock(lock: &group->notification_lock); |
| 1040 | fsnotify_destroy_event(group, event: fsn_event); |
| 1041 | } else { |
| 1042 | finish_permission_event(group, event: FANOTIFY_PERM(event), |
| 1043 | FAN_ALLOW, NULL); |
| 1044 | } |
| 1045 | spin_lock(lock: &group->notification_lock); |
| 1046 | } |
| 1047 | spin_unlock(lock: &group->notification_lock); |
| 1048 | |
| 1049 | /* Response for all permission events it set, wakeup waiters */ |
| 1050 | wake_up(&group->fanotify_data.access_waitq); |
| 1051 | |
| 1052 | /* matches the fanotify_init->fsnotify_alloc_group */ |
| 1053 | fsnotify_destroy_group(group); |
| 1054 | |
| 1055 | return 0; |
| 1056 | } |
| 1057 | |
| 1058 | static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
| 1059 | { |
| 1060 | struct fsnotify_group *group; |
| 1061 | struct fsnotify_event *fsn_event; |
| 1062 | void __user *p; |
| 1063 | int ret = -ENOTTY; |
| 1064 | size_t send_len = 0; |
| 1065 | |
| 1066 | group = file->private_data; |
| 1067 | |
| 1068 | p = (void __user *) arg; |
| 1069 | |
| 1070 | switch (cmd) { |
| 1071 | case FIONREAD: |
| 1072 | spin_lock(lock: &group->notification_lock); |
| 1073 | list_for_each_entry(fsn_event, &group->notification_list, list) |
| 1074 | send_len += FAN_EVENT_METADATA_LEN; |
| 1075 | spin_unlock(lock: &group->notification_lock); |
| 1076 | ret = put_user(send_len, (int __user *) p); |
| 1077 | break; |
| 1078 | } |
| 1079 | |
| 1080 | return ret; |
| 1081 | } |
| 1082 | |
| 1083 | static const struct file_operations fanotify_fops = { |
| 1084 | .show_fdinfo = fanotify_show_fdinfo, |
| 1085 | .poll = fanotify_poll, |
| 1086 | .read = fanotify_read, |
| 1087 | .write = fanotify_write, |
| 1088 | .fasync = NULL, |
| 1089 | .release = fanotify_release, |
| 1090 | .unlocked_ioctl = fanotify_ioctl, |
| 1091 | .compat_ioctl = compat_ptr_ioctl, |
| 1092 | .llseek = noop_llseek, |
| 1093 | }; |
| 1094 | |
| 1095 | static int fanotify_find_path(int dfd, const char __user *filename, |
| 1096 | struct path *path, unsigned int flags, __u64 mask, |
| 1097 | unsigned int obj_type) |
| 1098 | { |
| 1099 | int ret; |
| 1100 | |
| 1101 | pr_debug("%s: dfd=%d filename=%p flags=%x\n" , __func__, |
| 1102 | dfd, filename, flags); |
| 1103 | |
| 1104 | if (filename == NULL) { |
| 1105 | CLASS(fd, f)(fd: dfd); |
| 1106 | |
| 1107 | if (fd_empty(f)) |
| 1108 | return -EBADF; |
| 1109 | |
| 1110 | if ((flags & FAN_MARK_ONLYDIR) && |
| 1111 | !(S_ISDIR(file_inode(fd_file(f))->i_mode))) |
| 1112 | return -ENOTDIR; |
| 1113 | |
| 1114 | *path = fd_file(f)->f_path; |
| 1115 | path_get(path); |
| 1116 | } else { |
| 1117 | unsigned int lookup_flags = 0; |
| 1118 | |
| 1119 | if (!(flags & FAN_MARK_DONT_FOLLOW)) |
| 1120 | lookup_flags |= LOOKUP_FOLLOW; |
| 1121 | if (flags & FAN_MARK_ONLYDIR) |
| 1122 | lookup_flags |= LOOKUP_DIRECTORY; |
| 1123 | |
| 1124 | ret = user_path_at(dfd, filename, lookup_flags, path); |
| 1125 | if (ret) |
| 1126 | goto out; |
| 1127 | } |
| 1128 | |
| 1129 | /* you can only watch an inode if you have read permissions on it */ |
| 1130 | ret = path_permission(path, MAY_READ); |
| 1131 | if (ret) { |
| 1132 | path_put(path); |
| 1133 | goto out; |
| 1134 | } |
| 1135 | |
| 1136 | ret = security_path_notify(path, mask, obj_type); |
| 1137 | if (ret) |
| 1138 | path_put(path); |
| 1139 | |
| 1140 | out: |
| 1141 | return ret; |
| 1142 | } |
| 1143 | |
| 1144 | static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, |
| 1145 | __u32 mask, unsigned int flags, |
| 1146 | __u32 umask, int *destroy) |
| 1147 | { |
| 1148 | __u32 oldmask, newmask; |
| 1149 | |
| 1150 | /* umask bits cannot be removed by user */ |
| 1151 | mask &= ~umask; |
| 1152 | spin_lock(lock: &fsn_mark->lock); |
| 1153 | oldmask = fsnotify_calc_mask(mark: fsn_mark); |
| 1154 | if (!(flags & FANOTIFY_MARK_IGNORE_BITS)) { |
| 1155 | fsn_mark->mask &= ~mask; |
| 1156 | } else { |
| 1157 | fsn_mark->ignore_mask &= ~mask; |
| 1158 | } |
| 1159 | newmask = fsnotify_calc_mask(mark: fsn_mark); |
| 1160 | /* |
| 1161 | * We need to keep the mark around even if remaining mask cannot |
| 1162 | * result in any events (e.g. mask == FAN_ONDIR) to support incremenal |
| 1163 | * changes to the mask. |
| 1164 | * Destroy mark when only umask bits remain. |
| 1165 | */ |
| 1166 | *destroy = !((fsn_mark->mask | fsn_mark->ignore_mask) & ~umask); |
| 1167 | spin_unlock(lock: &fsn_mark->lock); |
| 1168 | |
| 1169 | return oldmask & ~newmask; |
| 1170 | } |
| 1171 | |
| 1172 | static int fanotify_remove_mark(struct fsnotify_group *group, |
| 1173 | void *obj, unsigned int obj_type, __u32 mask, |
| 1174 | unsigned int flags, __u32 umask) |
| 1175 | { |
| 1176 | struct fsnotify_mark *fsn_mark = NULL; |
| 1177 | __u32 removed; |
| 1178 | int destroy_mark; |
| 1179 | |
| 1180 | fsnotify_group_lock(group); |
| 1181 | fsn_mark = fsnotify_find_mark(obj, obj_type, group); |
| 1182 | if (!fsn_mark) { |
| 1183 | fsnotify_group_unlock(group); |
| 1184 | return -ENOENT; |
| 1185 | } |
| 1186 | |
| 1187 | removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, |
| 1188 | umask, destroy: &destroy_mark); |
| 1189 | if (removed & fsnotify_conn_mask(conn: fsn_mark->connector)) |
| 1190 | fsnotify_recalc_mask(conn: fsn_mark->connector); |
| 1191 | if (destroy_mark) |
| 1192 | fsnotify_detach_mark(mark: fsn_mark); |
| 1193 | fsnotify_group_unlock(group); |
| 1194 | if (destroy_mark) |
| 1195 | fsnotify_free_mark(mark: fsn_mark); |
| 1196 | |
| 1197 | /* matches the fsnotify_find_mark() */ |
| 1198 | fsnotify_put_mark(mark: fsn_mark); |
| 1199 | return 0; |
| 1200 | } |
| 1201 | |
| 1202 | static bool fanotify_mark_update_flags(struct fsnotify_mark *fsn_mark, |
| 1203 | unsigned int fan_flags) |
| 1204 | { |
| 1205 | bool want_iref = !(fan_flags & FAN_MARK_EVICTABLE); |
| 1206 | unsigned int ignore = fan_flags & FANOTIFY_MARK_IGNORE_BITS; |
| 1207 | bool recalc = false; |
| 1208 | |
| 1209 | /* |
| 1210 | * When using FAN_MARK_IGNORE for the first time, mark starts using |
| 1211 | * independent event flags in ignore mask. After that, trying to |
| 1212 | * update the ignore mask with the old FAN_MARK_IGNORED_MASK API |
| 1213 | * will result in EEXIST error. |
| 1214 | */ |
| 1215 | if (ignore == FAN_MARK_IGNORE) |
| 1216 | fsn_mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS; |
| 1217 | |
| 1218 | /* |
| 1219 | * Setting FAN_MARK_IGNORED_SURV_MODIFY for the first time may lead to |
| 1220 | * the removal of the FS_MODIFY bit in calculated mask if it was set |
| 1221 | * because of an ignore mask that is now going to survive FS_MODIFY. |
| 1222 | */ |
| 1223 | if (ignore && (fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && |
| 1224 | !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) { |
| 1225 | fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; |
| 1226 | if (!(fsn_mark->mask & FS_MODIFY)) |
| 1227 | recalc = true; |
| 1228 | } |
| 1229 | |
| 1230 | if (fsn_mark->connector->type != FSNOTIFY_OBJ_TYPE_INODE || |
| 1231 | want_iref == !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) |
| 1232 | return recalc; |
| 1233 | |
| 1234 | /* |
| 1235 | * NO_IREF may be removed from a mark, but not added. |
| 1236 | * When removed, fsnotify_recalc_mask() will take the inode ref. |
| 1237 | */ |
| 1238 | WARN_ON_ONCE(!want_iref); |
| 1239 | fsn_mark->flags &= ~FSNOTIFY_MARK_FLAG_NO_IREF; |
| 1240 | |
| 1241 | return true; |
| 1242 | } |
| 1243 | |
| 1244 | static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, |
| 1245 | __u32 mask, unsigned int fan_flags) |
| 1246 | { |
| 1247 | bool recalc; |
| 1248 | |
| 1249 | spin_lock(lock: &fsn_mark->lock); |
| 1250 | if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS)) |
| 1251 | fsn_mark->mask |= mask; |
| 1252 | else |
| 1253 | fsn_mark->ignore_mask |= mask; |
| 1254 | |
| 1255 | recalc = fsnotify_calc_mask(mark: fsn_mark) & |
| 1256 | ~fsnotify_conn_mask(conn: fsn_mark->connector); |
| 1257 | |
| 1258 | recalc |= fanotify_mark_update_flags(fsn_mark, fan_flags); |
| 1259 | spin_unlock(lock: &fsn_mark->lock); |
| 1260 | |
| 1261 | return recalc; |
| 1262 | } |
| 1263 | |
| 1264 | struct fan_fsid { |
| 1265 | struct super_block *sb; |
| 1266 | __kernel_fsid_t id; |
| 1267 | bool weak; |
| 1268 | }; |
| 1269 | |
| 1270 | static int fanotify_set_mark_fsid(struct fsnotify_group *group, |
| 1271 | struct fsnotify_mark *mark, |
| 1272 | struct fan_fsid *fsid) |
| 1273 | { |
| 1274 | struct fsnotify_mark_connector *conn; |
| 1275 | struct fsnotify_mark *old; |
| 1276 | struct super_block *old_sb = NULL; |
| 1277 | |
| 1278 | FANOTIFY_MARK(mark)->fsid = fsid->id; |
| 1279 | mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID; |
| 1280 | if (fsid->weak) |
| 1281 | mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID; |
| 1282 | |
| 1283 | /* First mark added will determine if group is single or multi fsid */ |
| 1284 | if (list_empty(head: &group->marks_list)) |
| 1285 | return 0; |
| 1286 | |
| 1287 | /* Find sb of an existing mark */ |
| 1288 | list_for_each_entry(old, &group->marks_list, g_list) { |
| 1289 | conn = READ_ONCE(old->connector); |
| 1290 | if (!conn) |
| 1291 | continue; |
| 1292 | old_sb = fsnotify_connector_sb(conn); |
| 1293 | if (old_sb) |
| 1294 | break; |
| 1295 | } |
| 1296 | |
| 1297 | /* Only detached marks left? */ |
| 1298 | if (!old_sb) |
| 1299 | return 0; |
| 1300 | |
| 1301 | /* Do not allow mixing of marks with weak and strong fsid */ |
| 1302 | if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID) |
| 1303 | return -EXDEV; |
| 1304 | |
| 1305 | /* Allow mixing of marks with strong fsid from different fs */ |
| 1306 | if (!fsid->weak) |
| 1307 | return 0; |
| 1308 | |
| 1309 | /* Do not allow mixing marks with weak fsid from different fs */ |
| 1310 | if (old_sb != fsid->sb) |
| 1311 | return -EXDEV; |
| 1312 | |
| 1313 | /* Do not allow mixing marks from different btrfs sub-volumes */ |
| 1314 | if (!fanotify_fsid_equal(fsid1: &FANOTIFY_MARK(mark: old)->fsid, |
| 1315 | fsid2: &FANOTIFY_MARK(mark)->fsid)) |
| 1316 | return -EXDEV; |
| 1317 | |
| 1318 | return 0; |
| 1319 | } |
| 1320 | |
| 1321 | static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, |
| 1322 | void *obj, |
| 1323 | unsigned int obj_type, |
| 1324 | unsigned int fan_flags, |
| 1325 | struct fan_fsid *fsid) |
| 1326 | { |
| 1327 | struct ucounts *ucounts = group->fanotify_data.ucounts; |
| 1328 | struct fanotify_mark *fan_mark; |
| 1329 | struct fsnotify_mark *mark; |
| 1330 | int ret; |
| 1331 | |
| 1332 | /* |
| 1333 | * Enforce per user marks limits per user in all containing user ns. |
| 1334 | * A group with FAN_UNLIMITED_MARKS does not contribute to mark count |
| 1335 | * in the limited groups account. |
| 1336 | */ |
| 1337 | BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_MARKS)); |
| 1338 | if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS) && |
| 1339 | !inc_ucount(ns: ucounts->ns, uid: ucounts->uid, type: UCOUNT_FANOTIFY_MARKS)) |
| 1340 | return ERR_PTR(error: -ENOSPC); |
| 1341 | |
| 1342 | fan_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL); |
| 1343 | if (!fan_mark) { |
| 1344 | ret = -ENOMEM; |
| 1345 | goto out_dec_ucounts; |
| 1346 | } |
| 1347 | |
| 1348 | mark = &fan_mark->fsn_mark; |
| 1349 | fsnotify_init_mark(mark, group); |
| 1350 | if (fan_flags & FAN_MARK_EVICTABLE) |
| 1351 | mark->flags |= FSNOTIFY_MARK_FLAG_NO_IREF; |
| 1352 | |
| 1353 | /* Cache fsid of filesystem containing the marked object */ |
| 1354 | if (fsid) { |
| 1355 | ret = fanotify_set_mark_fsid(group, mark, fsid); |
| 1356 | if (ret) |
| 1357 | goto out_put_mark; |
| 1358 | } else { |
| 1359 | fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0; |
| 1360 | } |
| 1361 | |
| 1362 | ret = fsnotify_add_mark_locked(mark, obj, obj_type, add_flags: 0); |
| 1363 | if (ret) |
| 1364 | goto out_put_mark; |
| 1365 | |
| 1366 | return mark; |
| 1367 | |
| 1368 | out_put_mark: |
| 1369 | fsnotify_put_mark(mark); |
| 1370 | out_dec_ucounts: |
| 1371 | if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) |
| 1372 | dec_ucount(ucounts, type: UCOUNT_FANOTIFY_MARKS); |
| 1373 | return ERR_PTR(error: ret); |
| 1374 | } |
| 1375 | |
| 1376 | static int fanotify_group_init_error_pool(struct fsnotify_group *group) |
| 1377 | { |
| 1378 | if (mempool_initialized(pool: &group->fanotify_data.error_events_pool)) |
| 1379 | return 0; |
| 1380 | |
| 1381 | return mempool_init_kmalloc_pool(&group->fanotify_data.error_events_pool, |
| 1382 | FANOTIFY_DEFAULT_FEE_POOL_SIZE, |
| 1383 | sizeof(struct fanotify_error_event)); |
| 1384 | } |
| 1385 | |
| 1386 | static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, |
| 1387 | __u32 mask, unsigned int fan_flags) |
| 1388 | { |
| 1389 | /* |
| 1390 | * Non evictable mark cannot be downgraded to evictable mark. |
| 1391 | */ |
| 1392 | if (fan_flags & FAN_MARK_EVICTABLE && |
| 1393 | !(fsn_mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF)) |
| 1394 | return -EEXIST; |
| 1395 | |
| 1396 | /* |
| 1397 | * New ignore mask semantics cannot be downgraded to old semantics. |
| 1398 | */ |
| 1399 | if (fan_flags & FAN_MARK_IGNORED_MASK && |
| 1400 | fsn_mark->flags & FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS) |
| 1401 | return -EEXIST; |
| 1402 | |
| 1403 | /* |
| 1404 | * An ignore mask that survives modify could never be downgraded to not |
| 1405 | * survive modify. With new FAN_MARK_IGNORE semantics we make that rule |
| 1406 | * explicit and return an error when trying to update the ignore mask |
| 1407 | * without the original FAN_MARK_IGNORED_SURV_MODIFY value. |
| 1408 | */ |
| 1409 | if (fan_flags & FAN_MARK_IGNORE && |
| 1410 | !(fan_flags & FAN_MARK_IGNORED_SURV_MODIFY) && |
| 1411 | fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) |
| 1412 | return -EEXIST; |
| 1413 | |
| 1414 | /* For now pre-content events are not generated for directories */ |
| 1415 | mask |= fsn_mark->mask; |
| 1416 | if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) |
| 1417 | return -EEXIST; |
| 1418 | |
| 1419 | return 0; |
| 1420 | } |
| 1421 | |
| 1422 | static int fanotify_add_mark(struct fsnotify_group *group, |
| 1423 | void *obj, unsigned int obj_type, |
| 1424 | __u32 mask, unsigned int fan_flags, |
| 1425 | struct fan_fsid *fsid) |
| 1426 | { |
| 1427 | struct fsnotify_mark *fsn_mark; |
| 1428 | bool recalc; |
| 1429 | int ret = 0; |
| 1430 | |
| 1431 | fsnotify_group_lock(group); |
| 1432 | fsn_mark = fsnotify_find_mark(obj, obj_type, group); |
| 1433 | if (!fsn_mark) { |
| 1434 | fsn_mark = fanotify_add_new_mark(group, obj, obj_type, |
| 1435 | fan_flags, fsid); |
| 1436 | if (IS_ERR(ptr: fsn_mark)) { |
| 1437 | fsnotify_group_unlock(group); |
| 1438 | return PTR_ERR(ptr: fsn_mark); |
| 1439 | } |
| 1440 | } |
| 1441 | |
| 1442 | /* |
| 1443 | * Check if requested mark flags conflict with an existing mark flags. |
| 1444 | */ |
| 1445 | ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags); |
| 1446 | if (ret) |
| 1447 | goto out; |
| 1448 | |
| 1449 | /* |
| 1450 | * Error events are pre-allocated per group, only if strictly |
| 1451 | * needed (i.e. FAN_FS_ERROR was requested). |
| 1452 | */ |
| 1453 | if (!(fan_flags & FANOTIFY_MARK_IGNORE_BITS) && |
| 1454 | (mask & FAN_FS_ERROR)) { |
| 1455 | ret = fanotify_group_init_error_pool(group); |
| 1456 | if (ret) |
| 1457 | goto out; |
| 1458 | } |
| 1459 | |
| 1460 | recalc = fanotify_mark_add_to_mask(fsn_mark, mask, fan_flags); |
| 1461 | if (recalc) |
| 1462 | fsnotify_recalc_mask(conn: fsn_mark->connector); |
| 1463 | |
| 1464 | out: |
| 1465 | fsnotify_group_unlock(group); |
| 1466 | |
| 1467 | fsnotify_put_mark(mark: fsn_mark); |
| 1468 | return ret; |
| 1469 | } |
| 1470 | |
| 1471 | static struct fsnotify_event *fanotify_alloc_overflow_event(void) |
| 1472 | { |
| 1473 | struct fanotify_event *oevent; |
| 1474 | |
| 1475 | oevent = kmalloc(sizeof(*oevent), GFP_KERNEL_ACCOUNT); |
| 1476 | if (!oevent) |
| 1477 | return NULL; |
| 1478 | |
| 1479 | fanotify_init_event(event: oevent, hash: 0, FS_Q_OVERFLOW); |
| 1480 | oevent->type = FANOTIFY_EVENT_TYPE_OVERFLOW; |
| 1481 | |
| 1482 | return &oevent->fse; |
| 1483 | } |
| 1484 | |
| 1485 | static struct hlist_head *fanotify_alloc_merge_hash(void) |
| 1486 | { |
| 1487 | struct hlist_head *hash; |
| 1488 | |
| 1489 | hash = kmalloc(sizeof(struct hlist_head) << FANOTIFY_HTABLE_BITS, |
| 1490 | GFP_KERNEL_ACCOUNT); |
| 1491 | if (!hash) |
| 1492 | return NULL; |
| 1493 | |
| 1494 | __hash_init(ht: hash, FANOTIFY_HTABLE_SIZE); |
| 1495 | |
| 1496 | return hash; |
| 1497 | } |
| 1498 | |
| 1499 | /* fanotify syscalls */ |
| 1500 | SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) |
| 1501 | { |
| 1502 | struct user_namespace *user_ns = current_user_ns(); |
| 1503 | struct fsnotify_group *group; |
| 1504 | int f_flags, fd; |
| 1505 | unsigned int fid_mode = flags & FANOTIFY_FID_BITS; |
| 1506 | unsigned int class = flags & FANOTIFY_CLASS_BITS; |
| 1507 | unsigned int internal_flags = 0; |
| 1508 | struct file *file; |
| 1509 | |
| 1510 | pr_debug("%s: flags=%x event_f_flags=%x\n" , |
| 1511 | __func__, flags, event_f_flags); |
| 1512 | |
| 1513 | if (!capable(CAP_SYS_ADMIN)) { |
| 1514 | /* |
| 1515 | * An unprivileged user can setup an fanotify group with |
| 1516 | * limited functionality - an unprivileged group is limited to |
| 1517 | * notification events with file handles or mount ids and it |
| 1518 | * cannot use unlimited queue/marks. |
| 1519 | */ |
| 1520 | if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || |
| 1521 | !(flags & (FANOTIFY_FID_BITS | FAN_REPORT_MNT))) |
| 1522 | return -EPERM; |
| 1523 | |
| 1524 | /* |
| 1525 | * Setting the internal flag FANOTIFY_UNPRIV on the group |
| 1526 | * prevents setting mount/filesystem marks on this group and |
| 1527 | * prevents reporting pid and open fd in events. |
| 1528 | */ |
| 1529 | internal_flags |= FANOTIFY_UNPRIV; |
| 1530 | } |
| 1531 | |
| 1532 | #ifdef CONFIG_AUDITSYSCALL |
| 1533 | if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT)) |
| 1534 | #else |
| 1535 | if (flags & ~FANOTIFY_INIT_FLAGS) |
| 1536 | #endif |
| 1537 | return -EINVAL; |
| 1538 | |
| 1539 | /* |
| 1540 | * A pidfd can only be returned for a thread-group leader; thus |
| 1541 | * FAN_REPORT_PIDFD and FAN_REPORT_TID need to remain mutually |
| 1542 | * exclusive. |
| 1543 | */ |
| 1544 | if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID)) |
| 1545 | return -EINVAL; |
| 1546 | |
| 1547 | /* Don't allow mixing mnt events with inode events for now */ |
| 1548 | if (flags & FAN_REPORT_MNT) { |
| 1549 | if (class != FAN_CLASS_NOTIF) |
| 1550 | return -EINVAL; |
| 1551 | if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR)) |
| 1552 | return -EINVAL; |
| 1553 | } |
| 1554 | |
| 1555 | if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) |
| 1556 | return -EINVAL; |
| 1557 | |
| 1558 | switch (event_f_flags & O_ACCMODE) { |
| 1559 | case O_RDONLY: |
| 1560 | case O_RDWR: |
| 1561 | case O_WRONLY: |
| 1562 | break; |
| 1563 | default: |
| 1564 | return -EINVAL; |
| 1565 | } |
| 1566 | |
| 1567 | if (fid_mode && class != FAN_CLASS_NOTIF) |
| 1568 | return -EINVAL; |
| 1569 | |
| 1570 | /* |
| 1571 | * Child name is reported with parent fid so requires dir fid. |
| 1572 | * We can report both child fid and dir fid with or without name. |
| 1573 | */ |
| 1574 | if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID)) |
| 1575 | return -EINVAL; |
| 1576 | |
| 1577 | /* |
| 1578 | * FAN_REPORT_TARGET_FID requires FAN_REPORT_NAME and FAN_REPORT_FID |
| 1579 | * and is used as an indication to report both dir and child fid on all |
| 1580 | * dirent events. |
| 1581 | */ |
| 1582 | if ((fid_mode & FAN_REPORT_TARGET_FID) && |
| 1583 | (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) |
| 1584 | return -EINVAL; |
| 1585 | |
| 1586 | f_flags = O_RDWR; |
| 1587 | if (flags & FAN_CLOEXEC) |
| 1588 | f_flags |= O_CLOEXEC; |
| 1589 | if (flags & FAN_NONBLOCK) |
| 1590 | f_flags |= O_NONBLOCK; |
| 1591 | |
| 1592 | /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ |
| 1593 | group = fsnotify_alloc_group(ops: &fanotify_fsnotify_ops, |
| 1594 | FSNOTIFY_GROUP_USER); |
| 1595 | if (IS_ERR(ptr: group)) { |
| 1596 | return PTR_ERR(ptr: group); |
| 1597 | } |
| 1598 | |
| 1599 | /* Enforce groups limits per user in all containing user ns */ |
| 1600 | group->fanotify_data.ucounts = inc_ucount(ns: user_ns, current_euid(), |
| 1601 | type: UCOUNT_FANOTIFY_GROUPS); |
| 1602 | if (!group->fanotify_data.ucounts) { |
| 1603 | fd = -EMFILE; |
| 1604 | goto out_destroy_group; |
| 1605 | } |
| 1606 | |
| 1607 | group->fanotify_data.flags = flags | internal_flags; |
| 1608 | group->memcg = get_mem_cgroup_from_mm(current->mm); |
| 1609 | group->user_ns = get_user_ns(ns: user_ns); |
| 1610 | |
| 1611 | group->fanotify_data.merge_hash = fanotify_alloc_merge_hash(); |
| 1612 | if (!group->fanotify_data.merge_hash) { |
| 1613 | fd = -ENOMEM; |
| 1614 | goto out_destroy_group; |
| 1615 | } |
| 1616 | |
| 1617 | group->overflow_event = fanotify_alloc_overflow_event(); |
| 1618 | if (unlikely(!group->overflow_event)) { |
| 1619 | fd = -ENOMEM; |
| 1620 | goto out_destroy_group; |
| 1621 | } |
| 1622 | |
| 1623 | if (force_o_largefile()) |
| 1624 | event_f_flags |= O_LARGEFILE; |
| 1625 | group->fanotify_data.f_flags = event_f_flags; |
| 1626 | init_waitqueue_head(&group->fanotify_data.access_waitq); |
| 1627 | INIT_LIST_HEAD(list: &group->fanotify_data.access_list); |
| 1628 | switch (class) { |
| 1629 | case FAN_CLASS_NOTIF: |
| 1630 | group->priority = FSNOTIFY_PRIO_NORMAL; |
| 1631 | break; |
| 1632 | case FAN_CLASS_CONTENT: |
| 1633 | group->priority = FSNOTIFY_PRIO_CONTENT; |
| 1634 | break; |
| 1635 | case FAN_CLASS_PRE_CONTENT: |
| 1636 | group->priority = FSNOTIFY_PRIO_PRE_CONTENT; |
| 1637 | break; |
| 1638 | default: |
| 1639 | fd = -EINVAL; |
| 1640 | goto out_destroy_group; |
| 1641 | } |
| 1642 | |
| 1643 | BUILD_BUG_ON(!(FANOTIFY_ADMIN_INIT_FLAGS & FAN_UNLIMITED_QUEUE)); |
| 1644 | if (flags & FAN_UNLIMITED_QUEUE) { |
| 1645 | group->max_events = UINT_MAX; |
| 1646 | } else { |
| 1647 | group->max_events = fanotify_max_queued_events; |
| 1648 | } |
| 1649 | |
| 1650 | if (flags & FAN_ENABLE_AUDIT) { |
| 1651 | fd = -EPERM; |
| 1652 | if (!capable(CAP_AUDIT_WRITE)) |
| 1653 | goto out_destroy_group; |
| 1654 | } |
| 1655 | |
| 1656 | fd = get_unused_fd_flags(flags: f_flags); |
| 1657 | if (fd < 0) |
| 1658 | goto out_destroy_group; |
| 1659 | |
| 1660 | file = anon_inode_getfile_fmode(name: "[fanotify]" , fops: &fanotify_fops, priv: group, |
| 1661 | flags: f_flags, FMODE_NONOTIFY); |
| 1662 | if (IS_ERR(ptr: file)) { |
| 1663 | put_unused_fd(fd); |
| 1664 | fd = PTR_ERR(ptr: file); |
| 1665 | goto out_destroy_group; |
| 1666 | } |
| 1667 | fd_install(fd, file); |
| 1668 | return fd; |
| 1669 | |
| 1670 | out_destroy_group: |
| 1671 | fsnotify_destroy_group(group); |
| 1672 | return fd; |
| 1673 | } |
| 1674 | |
| 1675 | static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags, |
| 1676 | struct fan_fsid *fsid) |
| 1677 | { |
| 1678 | unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; |
| 1679 | __kernel_fsid_t root_fsid; |
| 1680 | int err; |
| 1681 | |
| 1682 | /* |
| 1683 | * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse). |
| 1684 | */ |
| 1685 | err = vfs_get_fsid(dentry, fsid: &fsid->id); |
| 1686 | if (err) |
| 1687 | return err; |
| 1688 | |
| 1689 | fsid->sb = dentry->d_sb; |
| 1690 | if (!fsid->id.val[0] && !fsid->id.val[1]) { |
| 1691 | err = -ENODEV; |
| 1692 | goto weak; |
| 1693 | } |
| 1694 | |
| 1695 | /* |
| 1696 | * Make sure dentry is not of a filesystem subvolume (e.g. btrfs) |
| 1697 | * which uses a different fsid than sb root. |
| 1698 | */ |
| 1699 | err = vfs_get_fsid(dentry: dentry->d_sb->s_root, fsid: &root_fsid); |
| 1700 | if (err) |
| 1701 | return err; |
| 1702 | |
| 1703 | if (!fanotify_fsid_equal(fsid1: &root_fsid, fsid2: &fsid->id)) { |
| 1704 | err = -EXDEV; |
| 1705 | goto weak; |
| 1706 | } |
| 1707 | |
| 1708 | fsid->weak = false; |
| 1709 | return 0; |
| 1710 | |
| 1711 | weak: |
| 1712 | /* Allow weak fsid when marking inodes */ |
| 1713 | fsid->weak = true; |
| 1714 | return (mark_type == FAN_MARK_INODE) ? 0 : err; |
| 1715 | } |
| 1716 | |
| 1717 | /* Check if filesystem can encode a unique fid */ |
| 1718 | static int fanotify_test_fid(struct dentry *dentry, unsigned int flags) |
| 1719 | { |
| 1720 | unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; |
| 1721 | const struct export_operations *nop = dentry->d_sb->s_export_op; |
| 1722 | |
| 1723 | /* |
| 1724 | * We need to make sure that the filesystem supports encoding of |
| 1725 | * file handles so user can use name_to_handle_at() to compare fids |
| 1726 | * reported with events to the file handle of watched objects. |
| 1727 | */ |
| 1728 | if (!exportfs_can_encode_fid(nop)) |
| 1729 | return -EOPNOTSUPP; |
| 1730 | |
| 1731 | /* |
| 1732 | * For sb/mount mark, we also need to make sure that the filesystem |
| 1733 | * supports decoding file handles, so user has a way to map back the |
| 1734 | * reported fids to filesystem objects. |
| 1735 | */ |
| 1736 | if (mark_type != FAN_MARK_INODE && !exportfs_can_decode_fh(nop)) |
| 1737 | return -EOPNOTSUPP; |
| 1738 | |
| 1739 | return 0; |
| 1740 | } |
| 1741 | |
| 1742 | static int fanotify_events_supported(struct fsnotify_group *group, |
| 1743 | const struct path *path, __u64 mask, |
| 1744 | unsigned int flags) |
| 1745 | { |
| 1746 | unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; |
| 1747 | bool is_dir = d_is_dir(dentry: path->dentry); |
| 1748 | /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ |
| 1749 | bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || |
| 1750 | (mask & FAN_RENAME) || |
| 1751 | (flags & FAN_MARK_IGNORE); |
| 1752 | |
| 1753 | /* |
| 1754 | * Filesystems need to opt-into pre-content evnets (a.k.a HSM) |
| 1755 | * and they are only supported on regular files and directories. |
| 1756 | */ |
| 1757 | if (mask & FANOTIFY_PRE_CONTENT_EVENTS) { |
| 1758 | if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM)) |
| 1759 | return -EOPNOTSUPP; |
| 1760 | if (!is_dir && !d_is_reg(dentry: path->dentry)) |
| 1761 | return -EINVAL; |
| 1762 | } |
| 1763 | |
| 1764 | /* |
| 1765 | * Some filesystems such as 'proc' acquire unusual locks when opening |
| 1766 | * files. For them fanotify permission events have high chances of |
| 1767 | * deadlocking the system - open done when reporting fanotify event |
| 1768 | * blocks on this "unusual" lock while another process holding the lock |
| 1769 | * waits for fanotify permission event to be answered. Just disallow |
| 1770 | * permission events for such filesystems. |
| 1771 | */ |
| 1772 | if (mask & FANOTIFY_PERM_EVENTS && |
| 1773 | path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) |
| 1774 | return -EINVAL; |
| 1775 | |
| 1776 | /* |
| 1777 | * mount and sb marks are not allowed on kernel internal pseudo fs, |
| 1778 | * like pipe_mnt, because that would subscribe to events on all the |
| 1779 | * anonynous pipes in the system. |
| 1780 | * |
| 1781 | * SB_NOUSER covers all of the internal pseudo fs whose objects are not |
| 1782 | * exposed to user's mount namespace, but there are other SB_KERNMOUNT |
| 1783 | * fs, like nsfs, debugfs, for which the value of allowing sb and mount |
| 1784 | * mark is questionable. For now we leave them alone. |
| 1785 | */ |
| 1786 | if (mark_type != FAN_MARK_INODE && |
| 1787 | path->mnt->mnt_sb->s_flags & SB_NOUSER) |
| 1788 | return -EINVAL; |
| 1789 | |
| 1790 | /* |
| 1791 | * We shouldn't have allowed setting dirent events and the directory |
| 1792 | * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, |
| 1793 | * but because we always allowed it, error only when using new APIs. |
| 1794 | */ |
| 1795 | if (strict_dir_events && mark_type == FAN_MARK_INODE && |
| 1796 | !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) |
| 1797 | return -ENOTDIR; |
| 1798 | |
| 1799 | return 0; |
| 1800 | } |
| 1801 | |
| 1802 | static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, |
| 1803 | int dfd, const char __user *pathname) |
| 1804 | { |
| 1805 | struct inode *inode = NULL; |
| 1806 | struct fsnotify_group *group; |
| 1807 | struct path path; |
| 1808 | struct fan_fsid __fsid, *fsid = NULL; |
| 1809 | struct user_namespace *user_ns = NULL; |
| 1810 | struct mnt_namespace *mntns; |
| 1811 | u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS; |
| 1812 | unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; |
| 1813 | unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; |
| 1814 | unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; |
| 1815 | unsigned int obj_type, fid_mode; |
| 1816 | void *obj = NULL; |
| 1817 | u32 umask = 0; |
| 1818 | int ret; |
| 1819 | |
| 1820 | pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n" , |
| 1821 | __func__, fanotify_fd, flags, dfd, pathname, mask); |
| 1822 | |
| 1823 | /* we only use the lower 32 bits as of right now. */ |
| 1824 | if (upper_32_bits(mask)) |
| 1825 | return -EINVAL; |
| 1826 | |
| 1827 | if (flags & ~FANOTIFY_MARK_FLAGS) |
| 1828 | return -EINVAL; |
| 1829 | |
| 1830 | switch (mark_type) { |
| 1831 | case FAN_MARK_INODE: |
| 1832 | obj_type = FSNOTIFY_OBJ_TYPE_INODE; |
| 1833 | break; |
| 1834 | case FAN_MARK_MOUNT: |
| 1835 | obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT; |
| 1836 | break; |
| 1837 | case FAN_MARK_FILESYSTEM: |
| 1838 | obj_type = FSNOTIFY_OBJ_TYPE_SB; |
| 1839 | break; |
| 1840 | case FAN_MARK_MNTNS: |
| 1841 | obj_type = FSNOTIFY_OBJ_TYPE_MNTNS; |
| 1842 | break; |
| 1843 | default: |
| 1844 | return -EINVAL; |
| 1845 | } |
| 1846 | |
| 1847 | switch (mark_cmd) { |
| 1848 | case FAN_MARK_ADD: |
| 1849 | case FAN_MARK_REMOVE: |
| 1850 | if (!mask) |
| 1851 | return -EINVAL; |
| 1852 | break; |
| 1853 | case FAN_MARK_FLUSH: |
| 1854 | if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH)) |
| 1855 | return -EINVAL; |
| 1856 | break; |
| 1857 | default: |
| 1858 | return -EINVAL; |
| 1859 | } |
| 1860 | |
| 1861 | if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) |
| 1862 | valid_mask |= FANOTIFY_PERM_EVENTS; |
| 1863 | |
| 1864 | if (mask & ~valid_mask) |
| 1865 | return -EINVAL; |
| 1866 | |
| 1867 | |
| 1868 | /* We don't allow FAN_MARK_IGNORE & FAN_MARK_IGNORED_MASK together */ |
| 1869 | if (ignore == (FAN_MARK_IGNORE | FAN_MARK_IGNORED_MASK)) |
| 1870 | return -EINVAL; |
| 1871 | |
| 1872 | /* |
| 1873 | * Event flags (FAN_ONDIR, FAN_EVENT_ON_CHILD) have no effect with |
| 1874 | * FAN_MARK_IGNORED_MASK. |
| 1875 | */ |
| 1876 | if (ignore == FAN_MARK_IGNORED_MASK) { |
| 1877 | mask &= ~FANOTIFY_EVENT_FLAGS; |
| 1878 | umask = FANOTIFY_EVENT_FLAGS; |
| 1879 | } |
| 1880 | |
| 1881 | CLASS(fd, f)(fd: fanotify_fd); |
| 1882 | if (fd_empty(f)) |
| 1883 | return -EBADF; |
| 1884 | |
| 1885 | /* verify that this is indeed an fanotify instance */ |
| 1886 | if (unlikely(fd_file(f)->f_op != &fanotify_fops)) |
| 1887 | return -EINVAL; |
| 1888 | group = fd_file(f)->private_data; |
| 1889 | |
| 1890 | /* Only report mount events on mnt namespace */ |
| 1891 | if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { |
| 1892 | if (mask & ~FANOTIFY_MOUNT_EVENTS) |
| 1893 | return -EINVAL; |
| 1894 | if (mark_type != FAN_MARK_MNTNS) |
| 1895 | return -EINVAL; |
| 1896 | } else { |
| 1897 | if (mask & FANOTIFY_MOUNT_EVENTS) |
| 1898 | return -EINVAL; |
| 1899 | if (mark_type == FAN_MARK_MNTNS) |
| 1900 | return -EINVAL; |
| 1901 | } |
| 1902 | |
| 1903 | /* |
| 1904 | * A user is allowed to setup sb/mount/mntns marks only if it is |
| 1905 | * capable in the user ns where the group was created. |
| 1906 | */ |
| 1907 | if (!ns_capable(ns: group->user_ns, CAP_SYS_ADMIN) && |
| 1908 | mark_type != FAN_MARK_INODE) |
| 1909 | return -EPERM; |
| 1910 | |
| 1911 | /* |
| 1912 | * Permission events are not allowed for FAN_CLASS_NOTIF. |
| 1913 | * Pre-content permission events are not allowed for FAN_CLASS_CONTENT. |
| 1914 | */ |
| 1915 | if (mask & FANOTIFY_PERM_EVENTS && |
| 1916 | group->priority == FSNOTIFY_PRIO_NORMAL) |
| 1917 | return -EINVAL; |
| 1918 | else if (mask & FANOTIFY_PRE_CONTENT_EVENTS && |
| 1919 | group->priority == FSNOTIFY_PRIO_CONTENT) |
| 1920 | return -EINVAL; |
| 1921 | |
| 1922 | if (mask & FAN_FS_ERROR && |
| 1923 | mark_type != FAN_MARK_FILESYSTEM) |
| 1924 | return -EINVAL; |
| 1925 | |
| 1926 | /* |
| 1927 | * Evictable is only relevant for inode marks, because only inode object |
| 1928 | * can be evicted on memory pressure. |
| 1929 | */ |
| 1930 | if (flags & FAN_MARK_EVICTABLE && |
| 1931 | mark_type != FAN_MARK_INODE) |
| 1932 | return -EINVAL; |
| 1933 | |
| 1934 | /* |
| 1935 | * Events that do not carry enough information to report |
| 1936 | * event->fd require a group that supports reporting fid. Those |
| 1937 | * events are not supported on a mount mark, because they do not |
| 1938 | * carry enough information (i.e. path) to be filtered by mount |
| 1939 | * point. |
| 1940 | */ |
| 1941 | fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); |
| 1942 | if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) && |
| 1943 | (!fid_mode || mark_type == FAN_MARK_MOUNT)) |
| 1944 | return -EINVAL; |
| 1945 | |
| 1946 | /* |
| 1947 | * FAN_RENAME uses special info type records to report the old and |
| 1948 | * new parent+name. Reporting only old and new parent id is less |
| 1949 | * useful and was not implemented. |
| 1950 | */ |
| 1951 | if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) |
| 1952 | return -EINVAL; |
| 1953 | |
| 1954 | /* Pre-content events are not currently generated for directories. */ |
| 1955 | if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) |
| 1956 | return -EINVAL; |
| 1957 | |
| 1958 | if (mark_cmd == FAN_MARK_FLUSH) { |
| 1959 | fsnotify_clear_marks_by_group(group, obj_type); |
| 1960 | return 0; |
| 1961 | } |
| 1962 | |
| 1963 | ret = fanotify_find_path(dfd, filename: pathname, path: &path, flags, |
| 1964 | mask: (mask & ALL_FSNOTIFY_EVENTS), obj_type); |
| 1965 | if (ret) |
| 1966 | return ret; |
| 1967 | |
| 1968 | if (mark_cmd == FAN_MARK_ADD) { |
| 1969 | ret = fanotify_events_supported(group, path: &path, mask, flags); |
| 1970 | if (ret) |
| 1971 | goto path_put_and_out; |
| 1972 | } |
| 1973 | |
| 1974 | if (fid_mode) { |
| 1975 | ret = fanotify_test_fsid(dentry: path.dentry, flags, fsid: &__fsid); |
| 1976 | if (ret) |
| 1977 | goto path_put_and_out; |
| 1978 | |
| 1979 | ret = fanotify_test_fid(dentry: path.dentry, flags); |
| 1980 | if (ret) |
| 1981 | goto path_put_and_out; |
| 1982 | |
| 1983 | fsid = &__fsid; |
| 1984 | } |
| 1985 | |
| 1986 | /* |
| 1987 | * In addition to being capable in the user ns where group was created, |
| 1988 | * the user also needs to be capable in the user ns associated with |
| 1989 | * the filesystem or in the user ns associated with the mntns |
| 1990 | * (when marking mntns). |
| 1991 | */ |
| 1992 | if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { |
| 1993 | inode = path.dentry->d_inode; |
| 1994 | obj = inode; |
| 1995 | } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { |
| 1996 | user_ns = path.mnt->mnt_sb->s_user_ns; |
| 1997 | obj = path.mnt; |
| 1998 | } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { |
| 1999 | user_ns = path.mnt->mnt_sb->s_user_ns; |
| 2000 | obj = path.mnt->mnt_sb; |
| 2001 | } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { |
| 2002 | mntns = mnt_ns_from_dentry(dentry: path.dentry); |
| 2003 | user_ns = mntns->user_ns; |
| 2004 | obj = mntns; |
| 2005 | } |
| 2006 | |
| 2007 | ret = -EPERM; |
| 2008 | if (user_ns && !ns_capable(ns: user_ns, CAP_SYS_ADMIN)) |
| 2009 | goto path_put_and_out; |
| 2010 | |
| 2011 | ret = -EINVAL; |
| 2012 | if (!obj) |
| 2013 | goto path_put_and_out; |
| 2014 | |
| 2015 | /* |
| 2016 | * If some other task has this inode open for write we should not add |
| 2017 | * an ignore mask, unless that ignore mask is supposed to survive |
| 2018 | * modification changes anyway. |
| 2019 | */ |
| 2020 | if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && |
| 2021 | !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { |
| 2022 | ret = !inode ? -EINVAL : -EISDIR; |
| 2023 | /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ |
| 2024 | if (ignore == FAN_MARK_IGNORE && |
| 2025 | (!inode || S_ISDIR(inode->i_mode))) |
| 2026 | goto path_put_and_out; |
| 2027 | |
| 2028 | ret = 0; |
| 2029 | if (inode && inode_is_open_for_write(inode)) |
| 2030 | goto path_put_and_out; |
| 2031 | } |
| 2032 | |
| 2033 | /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ |
| 2034 | if (!inode || !S_ISDIR(inode->i_mode)) { |
| 2035 | mask &= ~FAN_EVENT_ON_CHILD; |
| 2036 | umask = FAN_EVENT_ON_CHILD; |
| 2037 | /* |
| 2038 | * If group needs to report parent fid, register for getting |
| 2039 | * events with parent/name info for non-directory. |
| 2040 | */ |
| 2041 | if ((fid_mode & FAN_REPORT_DIR_FID) && |
| 2042 | (flags & FAN_MARK_ADD) && !ignore) |
| 2043 | mask |= FAN_EVENT_ON_CHILD; |
| 2044 | } |
| 2045 | |
| 2046 | /* create/update an inode mark */ |
| 2047 | switch (mark_cmd) { |
| 2048 | case FAN_MARK_ADD: |
| 2049 | ret = fanotify_add_mark(group, obj, obj_type, mask, fan_flags: flags, |
| 2050 | fsid); |
| 2051 | break; |
| 2052 | case FAN_MARK_REMOVE: |
| 2053 | ret = fanotify_remove_mark(group, obj, obj_type, mask, flags, |
| 2054 | umask); |
| 2055 | break; |
| 2056 | default: |
| 2057 | ret = -EINVAL; |
| 2058 | } |
| 2059 | |
| 2060 | path_put_and_out: |
| 2061 | path_put(&path); |
| 2062 | return ret; |
| 2063 | } |
| 2064 | |
| 2065 | #ifndef CONFIG_ARCH_SPLIT_ARG64 |
| 2066 | SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, |
| 2067 | __u64, mask, int, dfd, |
| 2068 | const char __user *, pathname) |
| 2069 | { |
| 2070 | return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); |
| 2071 | } |
| 2072 | #endif |
| 2073 | |
| 2074 | #if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT) |
| 2075 | SYSCALL32_DEFINE6(fanotify_mark, |
| 2076 | int, fanotify_fd, unsigned int, flags, |
| 2077 | SC_ARG64(mask), int, dfd, |
| 2078 | const char __user *, pathname) |
| 2079 | { |
| 2080 | return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask), |
| 2081 | dfd, pathname); |
| 2082 | } |
| 2083 | #endif |
| 2084 | |
| 2085 | /* |
| 2086 | * fanotify_user_setup - Our initialization function. Note that we cannot return |
| 2087 | * error because we have compiled-in VFS hooks. So an (unlikely) failure here |
| 2088 | * must result in panic(). |
| 2089 | */ |
| 2090 | static int __init fanotify_user_setup(void) |
| 2091 | { |
| 2092 | struct sysinfo si; |
| 2093 | int max_marks; |
| 2094 | |
| 2095 | si_meminfo(val: &si); |
| 2096 | /* |
| 2097 | * Allow up to 1% of addressable memory to be accounted for per user |
| 2098 | * marks limited to the range [8192, 1048576]. mount and sb marks are |
| 2099 | * a lot cheaper than inode marks, but there is no reason for a user |
| 2100 | * to have many of those, so calculate by the cost of inode marks. |
| 2101 | */ |
| 2102 | max_marks = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / |
| 2103 | INODE_MARK_COST; |
| 2104 | max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS, |
| 2105 | FANOTIFY_DEFAULT_MAX_USER_MARKS); |
| 2106 | |
| 2107 | BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); |
| 2108 | BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14); |
| 2109 | BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); |
| 2110 | |
| 2111 | fanotify_mark_cache = KMEM_CACHE(fanotify_mark, |
| 2112 | SLAB_PANIC|SLAB_ACCOUNT); |
| 2113 | fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event, |
| 2114 | SLAB_PANIC); |
| 2115 | fanotify_path_event_cachep = KMEM_CACHE(fanotify_path_event, |
| 2116 | SLAB_PANIC); |
| 2117 | if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) { |
| 2118 | fanotify_perm_event_cachep = |
| 2119 | KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); |
| 2120 | } |
| 2121 | fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC); |
| 2122 | |
| 2123 | fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; |
| 2124 | init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = |
| 2125 | FANOTIFY_DEFAULT_MAX_GROUPS; |
| 2126 | init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks; |
| 2127 | fanotify_sysctls_init(); |
| 2128 | |
| 2129 | return 0; |
| 2130 | } |
| 2131 | device_initcall(fanotify_user_setup); |
| 2132 | |