1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/fanotify.h> |
3 | #include <linux/fdtable.h> |
4 | #include <linux/fsnotify_backend.h> |
5 | #include <linux/init.h> |
6 | #include <linux/jiffies.h> |
7 | #include <linux/kernel.h> /* UINT_MAX */ |
8 | #include <linux/mount.h> |
9 | #include <linux/sched.h> |
10 | #include <linux/sched/user.h> |
11 | #include <linux/sched/signal.h> |
12 | #include <linux/types.h> |
13 | #include <linux/wait.h> |
14 | #include <linux/audit.h> |
15 | #include <linux/sched/mm.h> |
16 | #include <linux/statfs.h> |
17 | #include <linux/stringhash.h> |
18 | |
19 | #include "fanotify.h" |
20 | |
21 | static bool fanotify_path_equal(const struct path *p1, const struct path *p2) |
22 | { |
23 | return p1->mnt == p2->mnt && p1->dentry == p2->dentry; |
24 | } |
25 | |
26 | static unsigned int fanotify_hash_path(const struct path *path) |
27 | { |
28 | return hash_ptr(ptr: path->dentry, FANOTIFY_EVENT_HASH_BITS) ^ |
29 | hash_ptr(ptr: path->mnt, FANOTIFY_EVENT_HASH_BITS); |
30 | } |
31 | |
32 | static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid) |
33 | { |
34 | return hash_32(val: fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^ |
35 | hash_32(val: fsid->val[1], FANOTIFY_EVENT_HASH_BITS); |
36 | } |
37 | |
38 | static bool fanotify_fh_equal(struct fanotify_fh *fh1, |
39 | struct fanotify_fh *fh2) |
40 | { |
41 | if (fh1->type != fh2->type || fh1->len != fh2->len) |
42 | return false; |
43 | |
44 | return !fh1->len || |
45 | !memcmp(p: fanotify_fh_buf(fh: fh1), q: fanotify_fh_buf(fh: fh2), size: fh1->len); |
46 | } |
47 | |
48 | static unsigned int fanotify_hash_fh(struct fanotify_fh *fh) |
49 | { |
50 | long salt = (long)fh->type | (long)fh->len << 8; |
51 | |
52 | /* |
53 | * full_name_hash() works long by long, so it handles fh buf optimally. |
54 | */ |
55 | return full_name_hash(salt: (void *)salt, fanotify_fh_buf(fh), fh->len); |
56 | } |
57 | |
58 | static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1, |
59 | struct fanotify_fid_event *ffe2) |
60 | { |
61 | /* Do not merge fid events without object fh */ |
62 | if (!ffe1->object_fh.len) |
63 | return false; |
64 | |
65 | return fanotify_fsid_equal(fsid1: &ffe1->fsid, fsid2: &ffe2->fsid) && |
66 | fanotify_fh_equal(fh1: &ffe1->object_fh, fh2: &ffe2->object_fh); |
67 | } |
68 | |
69 | static bool fanotify_info_equal(struct fanotify_info *info1, |
70 | struct fanotify_info *info2) |
71 | { |
72 | if (info1->dir_fh_totlen != info2->dir_fh_totlen || |
73 | info1->dir2_fh_totlen != info2->dir2_fh_totlen || |
74 | info1->file_fh_totlen != info2->file_fh_totlen || |
75 | info1->name_len != info2->name_len || |
76 | info1->name2_len != info2->name2_len) |
77 | return false; |
78 | |
79 | if (info1->dir_fh_totlen && |
80 | !fanotify_fh_equal(fh1: fanotify_info_dir_fh(info: info1), |
81 | fh2: fanotify_info_dir_fh(info: info2))) |
82 | return false; |
83 | |
84 | if (info1->dir2_fh_totlen && |
85 | !fanotify_fh_equal(fh1: fanotify_info_dir2_fh(info: info1), |
86 | fh2: fanotify_info_dir2_fh(info: info2))) |
87 | return false; |
88 | |
89 | if (info1->file_fh_totlen && |
90 | !fanotify_fh_equal(fh1: fanotify_info_file_fh(info: info1), |
91 | fh2: fanotify_info_file_fh(info: info2))) |
92 | return false; |
93 | |
94 | if (info1->name_len && |
95 | memcmp(p: fanotify_info_name(info: info1), q: fanotify_info_name(info: info2), |
96 | size: info1->name_len)) |
97 | return false; |
98 | |
99 | return !info1->name2_len || |
100 | !memcmp(p: fanotify_info_name2(info: info1), q: fanotify_info_name2(info: info2), |
101 | size: info1->name2_len); |
102 | } |
103 | |
104 | static bool fanotify_name_event_equal(struct fanotify_name_event *fne1, |
105 | struct fanotify_name_event *fne2) |
106 | { |
107 | struct fanotify_info *info1 = &fne1->info; |
108 | struct fanotify_info *info2 = &fne2->info; |
109 | |
110 | /* Do not merge name events without dir fh */ |
111 | if (!info1->dir_fh_totlen) |
112 | return false; |
113 | |
114 | if (!fanotify_fsid_equal(fsid1: &fne1->fsid, fsid2: &fne2->fsid)) |
115 | return false; |
116 | |
117 | return fanotify_info_equal(info1, info2); |
118 | } |
119 | |
120 | static bool fanotify_error_event_equal(struct fanotify_error_event *fee1, |
121 | struct fanotify_error_event *fee2) |
122 | { |
123 | /* Error events against the same file system are always merged. */ |
124 | if (!fanotify_fsid_equal(fsid1: &fee1->fsid, fsid2: &fee2->fsid)) |
125 | return false; |
126 | |
127 | return true; |
128 | } |
129 | |
130 | static bool fanotify_should_merge(struct fanotify_event *old, |
131 | struct fanotify_event *new) |
132 | { |
133 | pr_debug("%s: old=%p new=%p\n" , __func__, old, new); |
134 | |
135 | if (old->hash != new->hash || |
136 | old->type != new->type || old->pid != new->pid) |
137 | return false; |
138 | |
139 | /* |
140 | * We want to merge many dirent events in the same dir (i.e. |
141 | * creates/unlinks/renames), but we do not want to merge dirent |
142 | * events referring to subdirs with dirent events referring to |
143 | * non subdirs, otherwise, user won't be able to tell from a |
144 | * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+ |
145 | * unlink pair or rmdir+create pair of events. |
146 | */ |
147 | if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR)) |
148 | return false; |
149 | |
150 | /* |
151 | * FAN_RENAME event is reported with special info record types, |
152 | * so we cannot merge it with other events. |
153 | */ |
154 | if ((old->mask & FAN_RENAME) != (new->mask & FAN_RENAME)) |
155 | return false; |
156 | |
157 | switch (old->type) { |
158 | case FANOTIFY_EVENT_TYPE_PATH: |
159 | return fanotify_path_equal(p1: fanotify_event_path(event: old), |
160 | p2: fanotify_event_path(event: new)); |
161 | case FANOTIFY_EVENT_TYPE_FID: |
162 | return fanotify_fid_event_equal(ffe1: FANOTIFY_FE(event: old), |
163 | ffe2: FANOTIFY_FE(event: new)); |
164 | case FANOTIFY_EVENT_TYPE_FID_NAME: |
165 | return fanotify_name_event_equal(fne1: FANOTIFY_NE(event: old), |
166 | fne2: FANOTIFY_NE(event: new)); |
167 | case FANOTIFY_EVENT_TYPE_FS_ERROR: |
168 | return fanotify_error_event_equal(fee1: FANOTIFY_EE(event: old), |
169 | fee2: FANOTIFY_EE(event: new)); |
170 | default: |
171 | WARN_ON_ONCE(1); |
172 | } |
173 | |
174 | return false; |
175 | } |
176 | |
177 | /* Limit event merges to limit CPU overhead per event */ |
178 | #define FANOTIFY_MAX_MERGE_EVENTS 128 |
179 | |
180 | /* and the list better be locked by something too! */ |
181 | static int fanotify_merge(struct fsnotify_group *group, |
182 | struct fsnotify_event *event) |
183 | { |
184 | struct fanotify_event *old, *new = FANOTIFY_E(fse: event); |
185 | unsigned int bucket = fanotify_event_hash_bucket(group, event: new); |
186 | struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; |
187 | int i = 0; |
188 | |
189 | pr_debug("%s: group=%p event=%p bucket=%u\n" , __func__, |
190 | group, event, bucket); |
191 | |
192 | /* |
193 | * Don't merge a permission event with any other event so that we know |
194 | * the event structure we have created in fanotify_handle_event() is the |
195 | * one we should check for permission response. |
196 | */ |
197 | if (fanotify_is_perm_event(mask: new->mask)) |
198 | return 0; |
199 | |
200 | hlist_for_each_entry(old, hlist, merge_list) { |
201 | if (++i > FANOTIFY_MAX_MERGE_EVENTS) |
202 | break; |
203 | if (fanotify_should_merge(old, new)) { |
204 | old->mask |= new->mask; |
205 | |
206 | if (fanotify_is_error_event(mask: old->mask)) |
207 | FANOTIFY_EE(event: old)->err_count++; |
208 | |
209 | return 1; |
210 | } |
211 | } |
212 | |
213 | return 0; |
214 | } |
215 | |
216 | /* |
217 | * Wait for response to permission event. The function also takes care of |
218 | * freeing the permission event (or offloads that in case the wait is canceled |
219 | * by a signal). The function returns 0 in case access got allowed by userspace, |
220 | * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case |
221 | * the wait got interrupted by a signal. |
222 | */ |
223 | static int fanotify_get_response(struct fsnotify_group *group, |
224 | struct fanotify_perm_event *event, |
225 | struct fsnotify_iter_info *iter_info) |
226 | { |
227 | int ret; |
228 | |
229 | pr_debug("%s: group=%p event=%p\n" , __func__, group, event); |
230 | |
231 | ret = wait_event_state(group->fanotify_data.access_waitq, |
232 | event->state == FAN_EVENT_ANSWERED, |
233 | (TASK_KILLABLE|TASK_FREEZABLE)); |
234 | |
235 | /* Signal pending? */ |
236 | if (ret < 0) { |
237 | spin_lock(lock: &group->notification_lock); |
238 | /* Event reported to userspace and no answer yet? */ |
239 | if (event->state == FAN_EVENT_REPORTED) { |
240 | /* Event will get freed once userspace answers to it */ |
241 | event->state = FAN_EVENT_CANCELED; |
242 | spin_unlock(lock: &group->notification_lock); |
243 | return ret; |
244 | } |
245 | /* Event not yet reported? Just remove it. */ |
246 | if (event->state == FAN_EVENT_INIT) { |
247 | fsnotify_remove_queued_event(group, event: &event->fae.fse); |
248 | /* Permission events are not supposed to be hashed */ |
249 | WARN_ON_ONCE(!hlist_unhashed(&event->fae.merge_list)); |
250 | } |
251 | /* |
252 | * Event may be also answered in case signal delivery raced |
253 | * with wakeup. In that case we have nothing to do besides |
254 | * freeing the event and reporting error. |
255 | */ |
256 | spin_unlock(lock: &group->notification_lock); |
257 | goto out; |
258 | } |
259 | |
260 | /* userspace responded, convert to something usable */ |
261 | switch (event->response & FANOTIFY_RESPONSE_ACCESS) { |
262 | case FAN_ALLOW: |
263 | ret = 0; |
264 | break; |
265 | case FAN_DENY: |
266 | default: |
267 | ret = -EPERM; |
268 | } |
269 | |
270 | /* Check if the response should be audited */ |
271 | if (event->response & FAN_AUDIT) |
272 | audit_fanotify(response: event->response & ~FAN_AUDIT, |
273 | friar: &event->audit_rule); |
274 | |
275 | pr_debug("%s: group=%p event=%p about to return ret=%d\n" , __func__, |
276 | group, event, ret); |
277 | out: |
278 | fsnotify_destroy_event(group, event: &event->fae.fse); |
279 | |
280 | return ret; |
281 | } |
282 | |
283 | /* |
284 | * This function returns a mask for an event that only contains the flags |
285 | * that have been specifically requested by the user. Flags that may have |
286 | * been included within the event mask, but have not been explicitly |
287 | * requested by the user, will not be present in the returned mask. |
288 | */ |
289 | static u32 fanotify_group_event_mask(struct fsnotify_group *group, |
290 | struct fsnotify_iter_info *iter_info, |
291 | u32 *match_mask, u32 event_mask, |
292 | const void *data, int data_type, |
293 | struct inode *dir) |
294 | { |
295 | __u32 marks_mask = 0, marks_ignore_mask = 0; |
296 | __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS | |
297 | FANOTIFY_EVENT_FLAGS; |
298 | const struct path *path = fsnotify_data_path(data, data_type); |
299 | unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); |
300 | struct fsnotify_mark *mark; |
301 | bool ondir = event_mask & FAN_ONDIR; |
302 | int type; |
303 | |
304 | pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n" , |
305 | __func__, iter_info->report_mask, event_mask, data, data_type); |
306 | |
307 | if (!fid_mode) { |
308 | /* Do we have path to open a file descriptor? */ |
309 | if (!path) |
310 | return 0; |
311 | /* Path type events are only relevant for files and dirs */ |
312 | if (!d_is_reg(dentry: path->dentry) && !d_can_lookup(dentry: path->dentry)) |
313 | return 0; |
314 | } else if (!(fid_mode & FAN_REPORT_FID)) { |
315 | /* Do we have a directory inode to report? */ |
316 | if (!dir && !ondir) |
317 | return 0; |
318 | } |
319 | |
320 | fsnotify_foreach_iter_mark_type(iter_info, mark, type) { |
321 | /* |
322 | * Apply ignore mask depending on event flags in ignore mask. |
323 | */ |
324 | marks_ignore_mask |= |
325 | fsnotify_effective_ignore_mask(mark, is_dir: ondir, iter_type: type); |
326 | |
327 | /* |
328 | * Send the event depending on event flags in mark mask. |
329 | */ |
330 | if (!fsnotify_mask_applicable(mask: mark->mask, is_dir: ondir, iter_type: type)) |
331 | continue; |
332 | |
333 | marks_mask |= mark->mask; |
334 | |
335 | /* Record the mark types of this group that matched the event */ |
336 | *match_mask |= 1U << type; |
337 | } |
338 | |
339 | test_mask = event_mask & marks_mask & ~marks_ignore_mask; |
340 | |
341 | /* |
342 | * For dirent modification events (create/delete/move) that do not carry |
343 | * the child entry name information, we report FAN_ONDIR for mkdir/rmdir |
344 | * so user can differentiate them from creat/unlink. |
345 | * |
346 | * For backward compatibility and consistency, do not report FAN_ONDIR |
347 | * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR |
348 | * to user in fid mode for all event types. |
349 | * |
350 | * We never report FAN_EVENT_ON_CHILD to user, but we do pass it in to |
351 | * fanotify_alloc_event() when group is reporting fid as indication |
352 | * that event happened on child. |
353 | */ |
354 | if (fid_mode) { |
355 | /* Do not report event flags without any event */ |
356 | if (!(test_mask & ~FANOTIFY_EVENT_FLAGS)) |
357 | return 0; |
358 | } else { |
359 | user_mask &= ~FANOTIFY_EVENT_FLAGS; |
360 | } |
361 | |
362 | return test_mask & user_mask; |
363 | } |
364 | |
365 | /* |
366 | * Check size needed to encode fanotify_fh. |
367 | * |
368 | * Return size of encoded fh without fanotify_fh header. |
369 | * Return 0 on failure to encode. |
370 | */ |
371 | static int fanotify_encode_fh_len(struct inode *inode) |
372 | { |
373 | int dwords = 0; |
374 | int fh_len; |
375 | |
376 | if (!inode) |
377 | return 0; |
378 | |
379 | exportfs_encode_fid(inode, NULL, max_len: &dwords); |
380 | fh_len = dwords << 2; |
381 | |
382 | /* |
383 | * struct fanotify_error_event might be preallocated and is |
384 | * limited to MAX_HANDLE_SZ. This should never happen, but |
385 | * safeguard by forcing an invalid file handle. |
386 | */ |
387 | if (WARN_ON_ONCE(fh_len > MAX_HANDLE_SZ)) |
388 | return 0; |
389 | |
390 | return fh_len; |
391 | } |
392 | |
393 | /* |
394 | * Encode fanotify_fh. |
395 | * |
396 | * Return total size of encoded fh including fanotify_fh header. |
397 | * Return 0 on failure to encode. |
398 | */ |
399 | static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode, |
400 | unsigned int fh_len, unsigned int *hash, |
401 | gfp_t gfp) |
402 | { |
403 | int dwords, type = 0; |
404 | char *ext_buf = NULL; |
405 | void *buf = fh->buf; |
406 | int err; |
407 | |
408 | fh->type = FILEID_ROOT; |
409 | fh->len = 0; |
410 | fh->flags = 0; |
411 | |
412 | /* |
413 | * Invalid FHs are used by FAN_FS_ERROR for errors not |
414 | * linked to any inode. The f_handle won't be reported |
415 | * back to userspace. |
416 | */ |
417 | if (!inode) |
418 | goto out; |
419 | |
420 | /* |
421 | * !gpf means preallocated variable size fh, but fh_len could |
422 | * be zero in that case if encoding fh len failed. |
423 | */ |
424 | err = -ENOENT; |
425 | if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4) || fh_len > MAX_HANDLE_SZ) |
426 | goto out_err; |
427 | |
428 | /* No external buffer in a variable size allocated fh */ |
429 | if (gfp && fh_len > FANOTIFY_INLINE_FH_LEN) { |
430 | /* Treat failure to allocate fh as failure to encode fh */ |
431 | err = -ENOMEM; |
432 | ext_buf = kmalloc(size: fh_len, flags: gfp); |
433 | if (!ext_buf) |
434 | goto out_err; |
435 | |
436 | *fanotify_fh_ext_buf_ptr(fh) = ext_buf; |
437 | buf = ext_buf; |
438 | fh->flags |= FANOTIFY_FH_FLAG_EXT_BUF; |
439 | } |
440 | |
441 | dwords = fh_len >> 2; |
442 | type = exportfs_encode_fid(inode, fid: buf, max_len: &dwords); |
443 | err = -EINVAL; |
444 | if (type <= 0 || type == FILEID_INVALID || fh_len != dwords << 2) |
445 | goto out_err; |
446 | |
447 | fh->type = type; |
448 | fh->len = fh_len; |
449 | |
450 | out: |
451 | /* |
452 | * Mix fh into event merge key. Hash might be NULL in case of |
453 | * unhashed FID events (i.e. FAN_FS_ERROR). |
454 | */ |
455 | if (hash) |
456 | *hash ^= fanotify_hash_fh(fh); |
457 | |
458 | return FANOTIFY_FH_HDR_LEN + fh_len; |
459 | |
460 | out_err: |
461 | pr_warn_ratelimited("fanotify: failed to encode fid (type=%d, len=%d, err=%i)\n" , |
462 | type, fh_len, err); |
463 | kfree(objp: ext_buf); |
464 | *fanotify_fh_ext_buf_ptr(fh) = NULL; |
465 | /* Report the event without a file identifier on encode error */ |
466 | fh->type = FILEID_INVALID; |
467 | fh->len = 0; |
468 | return 0; |
469 | } |
470 | |
471 | /* |
472 | * FAN_REPORT_FID is ambiguous in that it reports the fid of the child for |
473 | * some events and the fid of the parent for create/delete/move events. |
474 | * |
475 | * With the FAN_REPORT_TARGET_FID flag, the fid of the child is reported |
476 | * also in create/delete/move events in addition to the fid of the parent |
477 | * and the name of the child. |
478 | */ |
479 | static inline bool fanotify_report_child_fid(unsigned int fid_mode, u32 mask) |
480 | { |
481 | if (mask & ALL_FSNOTIFY_DIRENT_EVENTS) |
482 | return (fid_mode & FAN_REPORT_TARGET_FID); |
483 | |
484 | return (fid_mode & FAN_REPORT_FID) && !(mask & FAN_ONDIR); |
485 | } |
486 | |
487 | /* |
488 | * The inode to use as identifier when reporting fid depends on the event |
489 | * and the group flags. |
490 | * |
491 | * With the group flag FAN_REPORT_TARGET_FID, always report the child fid. |
492 | * |
493 | * Without the group flag FAN_REPORT_TARGET_FID, report the modified directory |
494 | * fid on dirent events and the child fid otherwise. |
495 | * |
496 | * For example: |
497 | * FS_ATTRIB reports the child fid even if reported on a watched parent. |
498 | * FS_CREATE reports the modified dir fid without FAN_REPORT_TARGET_FID. |
499 | * and reports the created child fid with FAN_REPORT_TARGET_FID. |
500 | */ |
501 | static struct inode *fanotify_fid_inode(u32 event_mask, const void *data, |
502 | int data_type, struct inode *dir, |
503 | unsigned int fid_mode) |
504 | { |
505 | if ((event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) && |
506 | !(fid_mode & FAN_REPORT_TARGET_FID)) |
507 | return dir; |
508 | |
509 | return fsnotify_data_inode(data, data_type); |
510 | } |
511 | |
512 | /* |
513 | * The inode to use as identifier when reporting dir fid depends on the event. |
514 | * Report the modified directory inode on dirent modification events. |
515 | * Report the "victim" inode if "victim" is a directory. |
516 | * Report the parent inode if "victim" is not a directory and event is |
517 | * reported to parent. |
518 | * Otherwise, do not report dir fid. |
519 | */ |
520 | static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data, |
521 | int data_type, struct inode *dir) |
522 | { |
523 | struct inode *inode = fsnotify_data_inode(data, data_type); |
524 | |
525 | if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS) |
526 | return dir; |
527 | |
528 | if (inode && S_ISDIR(inode->i_mode)) |
529 | return inode; |
530 | |
531 | return dir; |
532 | } |
533 | |
534 | static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, |
535 | unsigned int *hash, |
536 | gfp_t gfp) |
537 | { |
538 | struct fanotify_path_event *pevent; |
539 | |
540 | pevent = kmem_cache_alloc(cachep: fanotify_path_event_cachep, flags: gfp); |
541 | if (!pevent) |
542 | return NULL; |
543 | |
544 | pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH; |
545 | pevent->path = *path; |
546 | *hash ^= fanotify_hash_path(path); |
547 | path_get(path); |
548 | |
549 | return &pevent->fae; |
550 | } |
551 | |
552 | static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, |
553 | gfp_t gfp) |
554 | { |
555 | struct fanotify_perm_event *pevent; |
556 | |
557 | pevent = kmem_cache_alloc(cachep: fanotify_perm_event_cachep, flags: gfp); |
558 | if (!pevent) |
559 | return NULL; |
560 | |
561 | pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH_PERM; |
562 | pevent->response = 0; |
563 | pevent->hdr.type = FAN_RESPONSE_INFO_NONE; |
564 | pevent->hdr.pad = 0; |
565 | pevent->hdr.len = 0; |
566 | pevent->state = FAN_EVENT_INIT; |
567 | pevent->path = *path; |
568 | path_get(path); |
569 | |
570 | return &pevent->fae; |
571 | } |
572 | |
573 | static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id, |
574 | __kernel_fsid_t *fsid, |
575 | unsigned int *hash, |
576 | gfp_t gfp) |
577 | { |
578 | struct fanotify_fid_event *ffe; |
579 | |
580 | ffe = kmem_cache_alloc(cachep: fanotify_fid_event_cachep, flags: gfp); |
581 | if (!ffe) |
582 | return NULL; |
583 | |
584 | ffe->fae.type = FANOTIFY_EVENT_TYPE_FID; |
585 | ffe->fsid = *fsid; |
586 | *hash ^= fanotify_hash_fsid(fsid); |
587 | fanotify_encode_fh(fh: &ffe->object_fh, inode: id, fh_len: fanotify_encode_fh_len(inode: id), |
588 | hash, gfp); |
589 | |
590 | return &ffe->fae; |
591 | } |
592 | |
593 | static struct fanotify_event *fanotify_alloc_name_event(struct inode *dir, |
594 | __kernel_fsid_t *fsid, |
595 | const struct qstr *name, |
596 | struct inode *child, |
597 | struct dentry *moved, |
598 | unsigned int *hash, |
599 | gfp_t gfp) |
600 | { |
601 | struct fanotify_name_event *fne; |
602 | struct fanotify_info *info; |
603 | struct fanotify_fh *dfh, *ffh; |
604 | struct inode *dir2 = moved ? d_inode(dentry: moved->d_parent) : NULL; |
605 | const struct qstr *name2 = moved ? &moved->d_name : NULL; |
606 | unsigned int dir_fh_len = fanotify_encode_fh_len(inode: dir); |
607 | unsigned int dir2_fh_len = fanotify_encode_fh_len(inode: dir2); |
608 | unsigned int child_fh_len = fanotify_encode_fh_len(inode: child); |
609 | unsigned long name_len = name ? name->len : 0; |
610 | unsigned long name2_len = name2 ? name2->len : 0; |
611 | unsigned int len, size; |
612 | |
613 | /* Reserve terminating null byte even for empty name */ |
614 | size = sizeof(*fne) + name_len + name2_len + 2; |
615 | if (dir_fh_len) |
616 | size += FANOTIFY_FH_HDR_LEN + dir_fh_len; |
617 | if (dir2_fh_len) |
618 | size += FANOTIFY_FH_HDR_LEN + dir2_fh_len; |
619 | if (child_fh_len) |
620 | size += FANOTIFY_FH_HDR_LEN + child_fh_len; |
621 | fne = kmalloc(size, flags: gfp); |
622 | if (!fne) |
623 | return NULL; |
624 | |
625 | fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME; |
626 | fne->fsid = *fsid; |
627 | *hash ^= fanotify_hash_fsid(fsid); |
628 | info = &fne->info; |
629 | fanotify_info_init(info); |
630 | if (dir_fh_len) { |
631 | dfh = fanotify_info_dir_fh(info); |
632 | len = fanotify_encode_fh(fh: dfh, inode: dir, fh_len: dir_fh_len, hash, gfp: 0); |
633 | fanotify_info_set_dir_fh(info, totlen: len); |
634 | } |
635 | if (dir2_fh_len) { |
636 | dfh = fanotify_info_dir2_fh(info); |
637 | len = fanotify_encode_fh(fh: dfh, inode: dir2, fh_len: dir2_fh_len, hash, gfp: 0); |
638 | fanotify_info_set_dir2_fh(info, totlen: len); |
639 | } |
640 | if (child_fh_len) { |
641 | ffh = fanotify_info_file_fh(info); |
642 | len = fanotify_encode_fh(fh: ffh, inode: child, fh_len: child_fh_len, hash, gfp: 0); |
643 | fanotify_info_set_file_fh(info, totlen: len); |
644 | } |
645 | if (name_len) { |
646 | fanotify_info_copy_name(info, name); |
647 | *hash ^= full_name_hash(salt: (void *)name_len, name->name, name_len); |
648 | } |
649 | if (name2_len) { |
650 | fanotify_info_copy_name2(info, name: name2); |
651 | *hash ^= full_name_hash(salt: (void *)name2_len, name2->name, |
652 | name2_len); |
653 | } |
654 | |
655 | pr_debug("%s: size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n" , |
656 | __func__, size, dir_fh_len, child_fh_len, |
657 | info->name_len, info->name_len, fanotify_info_name(info)); |
658 | |
659 | if (dir2_fh_len) { |
660 | pr_debug("%s: dir2_fh_len=%u name2_len=%u name2='%.*s'\n" , |
661 | __func__, dir2_fh_len, info->name2_len, |
662 | info->name2_len, fanotify_info_name2(info)); |
663 | } |
664 | |
665 | return &fne->fae; |
666 | } |
667 | |
668 | static struct fanotify_event *fanotify_alloc_error_event( |
669 | struct fsnotify_group *group, |
670 | __kernel_fsid_t *fsid, |
671 | const void *data, int data_type, |
672 | unsigned int *hash) |
673 | { |
674 | struct fs_error_report *report = |
675 | fsnotify_data_error_report(data, data_type); |
676 | struct inode *inode; |
677 | struct fanotify_error_event *fee; |
678 | int fh_len; |
679 | |
680 | if (WARN_ON_ONCE(!report)) |
681 | return NULL; |
682 | |
683 | fee = mempool_alloc(pool: &group->fanotify_data.error_events_pool, GFP_NOFS); |
684 | if (!fee) |
685 | return NULL; |
686 | |
687 | fee->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR; |
688 | fee->error = report->error; |
689 | fee->err_count = 1; |
690 | fee->fsid = *fsid; |
691 | |
692 | inode = report->inode; |
693 | fh_len = fanotify_encode_fh_len(inode); |
694 | |
695 | /* Bad fh_len. Fallback to using an invalid fh. Should never happen. */ |
696 | if (!fh_len && inode) |
697 | inode = NULL; |
698 | |
699 | fanotify_encode_fh(fh: &fee->object_fh, inode, fh_len, NULL, gfp: 0); |
700 | |
701 | *hash ^= fanotify_hash_fsid(fsid); |
702 | |
703 | return &fee->fae; |
704 | } |
705 | |
706 | static struct fanotify_event *fanotify_alloc_event( |
707 | struct fsnotify_group *group, |
708 | u32 mask, const void *data, int data_type, |
709 | struct inode *dir, const struct qstr *file_name, |
710 | __kernel_fsid_t *fsid, u32 match_mask) |
711 | { |
712 | struct fanotify_event *event = NULL; |
713 | gfp_t gfp = GFP_KERNEL_ACCOUNT; |
714 | unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); |
715 | struct inode *id = fanotify_fid_inode(event_mask: mask, data, data_type, dir, |
716 | fid_mode); |
717 | struct inode *dirid = fanotify_dfid_inode(event_mask: mask, data, data_type, dir); |
718 | const struct path *path = fsnotify_data_path(data, data_type); |
719 | struct mem_cgroup *old_memcg; |
720 | struct dentry *moved = NULL; |
721 | struct inode *child = NULL; |
722 | bool name_event = false; |
723 | unsigned int hash = 0; |
724 | bool ondir = mask & FAN_ONDIR; |
725 | struct pid *pid; |
726 | |
727 | if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) { |
728 | /* |
729 | * For certain events and group flags, report the child fid |
730 | * in addition to reporting the parent fid and maybe child name. |
731 | */ |
732 | if (fanotify_report_child_fid(fid_mode, mask) && id != dirid) |
733 | child = id; |
734 | |
735 | id = dirid; |
736 | |
737 | /* |
738 | * We record file name only in a group with FAN_REPORT_NAME |
739 | * and when we have a directory inode to report. |
740 | * |
741 | * For directory entry modification event, we record the fid of |
742 | * the directory and the name of the modified entry. |
743 | * |
744 | * For event on non-directory that is reported to parent, we |
745 | * record the fid of the parent and the name of the child. |
746 | * |
747 | * Even if not reporting name, we need a variable length |
748 | * fanotify_name_event if reporting both parent and child fids. |
749 | */ |
750 | if (!(fid_mode & FAN_REPORT_NAME)) { |
751 | name_event = !!child; |
752 | file_name = NULL; |
753 | } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) || !ondir) { |
754 | name_event = true; |
755 | } |
756 | |
757 | /* |
758 | * In the special case of FAN_RENAME event, use the match_mask |
759 | * to determine if we need to report only the old parent+name, |
760 | * only the new parent+name or both. |
761 | * 'dirid' and 'file_name' are the old parent+name and |
762 | * 'moved' has the new parent+name. |
763 | */ |
764 | if (mask & FAN_RENAME) { |
765 | bool report_old, report_new; |
766 | |
767 | if (WARN_ON_ONCE(!match_mask)) |
768 | return NULL; |
769 | |
770 | /* Report both old and new parent+name if sb watching */ |
771 | report_old = report_new = |
772 | match_mask & (1U << FSNOTIFY_ITER_TYPE_SB); |
773 | report_old |= |
774 | match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE); |
775 | report_new |= |
776 | match_mask & (1U << FSNOTIFY_ITER_TYPE_INODE2); |
777 | |
778 | if (!report_old) { |
779 | /* Do not report old parent+name */ |
780 | dirid = NULL; |
781 | file_name = NULL; |
782 | } |
783 | if (report_new) { |
784 | /* Report new parent+name */ |
785 | moved = fsnotify_data_dentry(data, data_type); |
786 | } |
787 | } |
788 | } |
789 | |
790 | /* |
791 | * For queues with unlimited length lost events are not expected and |
792 | * can possibly have security implications. Avoid losing events when |
793 | * memory is short. For the limited size queues, avoid OOM killer in the |
794 | * target monitoring memcg as it may have security repercussion. |
795 | */ |
796 | if (group->max_events == UINT_MAX) |
797 | gfp |= __GFP_NOFAIL; |
798 | else |
799 | gfp |= __GFP_RETRY_MAYFAIL; |
800 | |
801 | /* Whoever is interested in the event, pays for the allocation. */ |
802 | old_memcg = set_active_memcg(group->memcg); |
803 | |
804 | if (fanotify_is_perm_event(mask)) { |
805 | event = fanotify_alloc_perm_event(path, gfp); |
806 | } else if (fanotify_is_error_event(mask)) { |
807 | event = fanotify_alloc_error_event(group, fsid, data, |
808 | data_type, hash: &hash); |
809 | } else if (name_event && (file_name || moved || child)) { |
810 | event = fanotify_alloc_name_event(dir: dirid, fsid, name: file_name, child, |
811 | moved, hash: &hash, gfp); |
812 | } else if (fid_mode) { |
813 | event = fanotify_alloc_fid_event(id, fsid, hash: &hash, gfp); |
814 | } else { |
815 | event = fanotify_alloc_path_event(path, hash: &hash, gfp); |
816 | } |
817 | |
818 | if (!event) |
819 | goto out; |
820 | |
821 | if (FAN_GROUP_FLAG(group, FAN_REPORT_TID)) |
822 | pid = get_pid(pid: task_pid(current)); |
823 | else |
824 | pid = get_pid(pid: task_tgid(current)); |
825 | |
826 | /* Mix event info, FAN_ONDIR flag and pid into event merge key */ |
827 | hash ^= hash_long((unsigned long)pid | ondir, FANOTIFY_EVENT_HASH_BITS); |
828 | fanotify_init_event(event, hash, mask); |
829 | event->pid = pid; |
830 | |
831 | out: |
832 | set_active_memcg(old_memcg); |
833 | return event; |
834 | } |
835 | |
836 | /* |
837 | * Get cached fsid of the filesystem containing the object from any mark. |
838 | * All marks are supposed to have the same fsid, but we do not verify that here. |
839 | */ |
840 | static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info) |
841 | { |
842 | struct fsnotify_mark *mark; |
843 | int type; |
844 | __kernel_fsid_t fsid = {}; |
845 | |
846 | fsnotify_foreach_iter_mark_type(iter_info, mark, type) { |
847 | if (!(mark->flags & FSNOTIFY_MARK_FLAG_HAS_FSID)) |
848 | continue; |
849 | fsid = FANOTIFY_MARK(mark)->fsid; |
850 | if (!(mark->flags & FSNOTIFY_MARK_FLAG_WEAK_FSID) && |
851 | WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1])) |
852 | continue; |
853 | return fsid; |
854 | } |
855 | |
856 | return fsid; |
857 | } |
858 | |
859 | /* |
860 | * Add an event to hash table for faster merge. |
861 | */ |
862 | static void fanotify_insert_event(struct fsnotify_group *group, |
863 | struct fsnotify_event *fsn_event) |
864 | { |
865 | struct fanotify_event *event = FANOTIFY_E(fse: fsn_event); |
866 | unsigned int bucket = fanotify_event_hash_bucket(group, event); |
867 | struct hlist_head *hlist = &group->fanotify_data.merge_hash[bucket]; |
868 | |
869 | assert_spin_locked(&group->notification_lock); |
870 | |
871 | if (!fanotify_is_hashed_event(mask: event->mask)) |
872 | return; |
873 | |
874 | pr_debug("%s: group=%p event=%p bucket=%u\n" , __func__, |
875 | group, event, bucket); |
876 | |
877 | hlist_add_head(n: &event->merge_list, h: hlist); |
878 | } |
879 | |
880 | static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, |
881 | const void *data, int data_type, |
882 | struct inode *dir, |
883 | const struct qstr *file_name, u32 cookie, |
884 | struct fsnotify_iter_info *iter_info) |
885 | { |
886 | int ret = 0; |
887 | struct fanotify_event *event; |
888 | struct fsnotify_event *fsn_event; |
889 | __kernel_fsid_t fsid = {}; |
890 | u32 match_mask = 0; |
891 | |
892 | BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS); |
893 | BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY); |
894 | BUILD_BUG_ON(FAN_ATTRIB != FS_ATTRIB); |
895 | BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); |
896 | BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE); |
897 | BUILD_BUG_ON(FAN_OPEN != FS_OPEN); |
898 | BUILD_BUG_ON(FAN_MOVED_TO != FS_MOVED_TO); |
899 | BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM); |
900 | BUILD_BUG_ON(FAN_CREATE != FS_CREATE); |
901 | BUILD_BUG_ON(FAN_DELETE != FS_DELETE); |
902 | BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF); |
903 | BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF); |
904 | BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD); |
905 | BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW); |
906 | BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM); |
907 | BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM); |
908 | BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR); |
909 | BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC); |
910 | BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); |
911 | BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); |
912 | BUILD_BUG_ON(FAN_RENAME != FS_RENAME); |
913 | |
914 | BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); |
915 | |
916 | mask = fanotify_group_event_mask(group, iter_info, match_mask: &match_mask, |
917 | event_mask: mask, data, data_type, dir); |
918 | if (!mask) |
919 | return 0; |
920 | |
921 | pr_debug("%s: group=%p mask=%x report_mask=%x\n" , __func__, |
922 | group, mask, match_mask); |
923 | |
924 | if (fanotify_is_perm_event(mask)) { |
925 | /* |
926 | * fsnotify_prepare_user_wait() fails if we race with mark |
927 | * deletion. Just let the operation pass in that case. |
928 | */ |
929 | if (!fsnotify_prepare_user_wait(iter_info)) |
930 | return 0; |
931 | } |
932 | |
933 | if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) |
934 | fsid = fanotify_get_fsid(iter_info); |
935 | |
936 | event = fanotify_alloc_event(group, mask, data, data_type, dir, |
937 | file_name, fsid: &fsid, match_mask); |
938 | ret = -ENOMEM; |
939 | if (unlikely(!event)) { |
940 | /* |
941 | * We don't queue overflow events for permission events as |
942 | * there the access is denied and so no event is in fact lost. |
943 | */ |
944 | if (!fanotify_is_perm_event(mask)) |
945 | fsnotify_queue_overflow(group); |
946 | goto finish; |
947 | } |
948 | |
949 | fsn_event = &event->fse; |
950 | ret = fsnotify_insert_event(group, event: fsn_event, merge: fanotify_merge, |
951 | insert: fanotify_insert_event); |
952 | if (ret) { |
953 | /* Permission events shouldn't be merged */ |
954 | BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS); |
955 | /* Our event wasn't used in the end. Free it. */ |
956 | fsnotify_destroy_event(group, event: fsn_event); |
957 | |
958 | ret = 0; |
959 | } else if (fanotify_is_perm_event(mask)) { |
960 | ret = fanotify_get_response(group, event: FANOTIFY_PERM(event), |
961 | iter_info); |
962 | } |
963 | finish: |
964 | if (fanotify_is_perm_event(mask)) |
965 | fsnotify_finish_user_wait(iter_info); |
966 | |
967 | return ret; |
968 | } |
969 | |
970 | static void fanotify_free_group_priv(struct fsnotify_group *group) |
971 | { |
972 | kfree(objp: group->fanotify_data.merge_hash); |
973 | if (group->fanotify_data.ucounts) |
974 | dec_ucount(ucounts: group->fanotify_data.ucounts, |
975 | type: UCOUNT_FANOTIFY_GROUPS); |
976 | |
977 | if (mempool_initialized(pool: &group->fanotify_data.error_events_pool)) |
978 | mempool_exit(pool: &group->fanotify_data.error_events_pool); |
979 | } |
980 | |
981 | static void fanotify_free_path_event(struct fanotify_event *event) |
982 | { |
983 | path_put(fanotify_event_path(event)); |
984 | kmem_cache_free(s: fanotify_path_event_cachep, objp: FANOTIFY_PE(event)); |
985 | } |
986 | |
987 | static void fanotify_free_perm_event(struct fanotify_event *event) |
988 | { |
989 | path_put(fanotify_event_path(event)); |
990 | kmem_cache_free(s: fanotify_perm_event_cachep, objp: FANOTIFY_PERM(event)); |
991 | } |
992 | |
993 | static void fanotify_free_fid_event(struct fanotify_event *event) |
994 | { |
995 | struct fanotify_fid_event *ffe = FANOTIFY_FE(event); |
996 | |
997 | if (fanotify_fh_has_ext_buf(fh: &ffe->object_fh)) |
998 | kfree(objp: fanotify_fh_ext_buf(fh: &ffe->object_fh)); |
999 | kmem_cache_free(s: fanotify_fid_event_cachep, objp: ffe); |
1000 | } |
1001 | |
1002 | static void fanotify_free_name_event(struct fanotify_event *event) |
1003 | { |
1004 | kfree(objp: FANOTIFY_NE(event)); |
1005 | } |
1006 | |
1007 | static void fanotify_free_error_event(struct fsnotify_group *group, |
1008 | struct fanotify_event *event) |
1009 | { |
1010 | struct fanotify_error_event *fee = FANOTIFY_EE(event); |
1011 | |
1012 | mempool_free(element: fee, pool: &group->fanotify_data.error_events_pool); |
1013 | } |
1014 | |
1015 | static void fanotify_free_event(struct fsnotify_group *group, |
1016 | struct fsnotify_event *fsn_event) |
1017 | { |
1018 | struct fanotify_event *event; |
1019 | |
1020 | event = FANOTIFY_E(fse: fsn_event); |
1021 | put_pid(pid: event->pid); |
1022 | switch (event->type) { |
1023 | case FANOTIFY_EVENT_TYPE_PATH: |
1024 | fanotify_free_path_event(event); |
1025 | break; |
1026 | case FANOTIFY_EVENT_TYPE_PATH_PERM: |
1027 | fanotify_free_perm_event(event); |
1028 | break; |
1029 | case FANOTIFY_EVENT_TYPE_FID: |
1030 | fanotify_free_fid_event(event); |
1031 | break; |
1032 | case FANOTIFY_EVENT_TYPE_FID_NAME: |
1033 | fanotify_free_name_event(event); |
1034 | break; |
1035 | case FANOTIFY_EVENT_TYPE_OVERFLOW: |
1036 | kfree(objp: event); |
1037 | break; |
1038 | case FANOTIFY_EVENT_TYPE_FS_ERROR: |
1039 | fanotify_free_error_event(group, event); |
1040 | break; |
1041 | default: |
1042 | WARN_ON_ONCE(1); |
1043 | } |
1044 | } |
1045 | |
1046 | static void fanotify_freeing_mark(struct fsnotify_mark *mark, |
1047 | struct fsnotify_group *group) |
1048 | { |
1049 | if (!FAN_GROUP_FLAG(group, FAN_UNLIMITED_MARKS)) |
1050 | dec_ucount(ucounts: group->fanotify_data.ucounts, type: UCOUNT_FANOTIFY_MARKS); |
1051 | } |
1052 | |
1053 | static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) |
1054 | { |
1055 | kmem_cache_free(s: fanotify_mark_cache, objp: FANOTIFY_MARK(mark: fsn_mark)); |
1056 | } |
1057 | |
1058 | const struct fsnotify_ops fanotify_fsnotify_ops = { |
1059 | .handle_event = fanotify_handle_event, |
1060 | .free_group_priv = fanotify_free_group_priv, |
1061 | .free_event = fanotify_free_event, |
1062 | .freeing_mark = fanotify_freeing_mark, |
1063 | .free_mark = fanotify_free_mark, |
1064 | }; |
1065 | |