1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * linux/fs/readdir.c |
4 | * |
5 | * Copyright (C) 1995 Linus Torvalds |
6 | */ |
7 | |
8 | #include <linux/stddef.h> |
9 | #include <linux/kernel.h> |
10 | #include <linux/export.h> |
11 | #include <linux/time.h> |
12 | #include <linux/mm.h> |
13 | #include <linux/errno.h> |
14 | #include <linux/stat.h> |
15 | #include <linux/file.h> |
16 | #include <linux/fs.h> |
17 | #include <linux/fsnotify.h> |
18 | #include <linux/dirent.h> |
19 | #include <linux/security.h> |
20 | #include <linux/syscalls.h> |
21 | #include <linux/unistd.h> |
22 | #include <linux/compat.h> |
23 | #include <linux/uaccess.h> |
24 | |
25 | /* |
26 | * Some filesystems were never converted to '->iterate_shared()' |
27 | * and their directory iterators want the inode lock held for |
28 | * writing. This wrapper allows for converting from the shared |
29 | * semantics to the exclusive inode use. |
30 | */ |
31 | int wrap_directory_iterator(struct file *file, |
32 | struct dir_context *ctx, |
33 | int (*iter)(struct file *, struct dir_context *)) |
34 | { |
35 | struct inode *inode = file_inode(f: file); |
36 | int ret; |
37 | |
38 | /* |
39 | * We'd love to have an 'inode_upgrade_trylock()' operation, |
40 | * see the comment in mmap_upgrade_trylock() in mm/memory.c. |
41 | * |
42 | * But considering this is for "filesystems that never got |
43 | * converted", it really doesn't matter. |
44 | * |
45 | * Also note that since we have to return with the lock held |
46 | * for reading, we can't use the "killable()" locking here, |
47 | * since we do need to get the lock even if we're dying. |
48 | * |
49 | * We could do the write part killably and then get the read |
50 | * lock unconditionally if it mattered, but see above on why |
51 | * this does the very simplistic conversion. |
52 | */ |
53 | up_read(sem: &inode->i_rwsem); |
54 | down_write(sem: &inode->i_rwsem); |
55 | |
56 | /* |
57 | * Since we dropped the inode lock, we should do the |
58 | * DEADDIR test again. See 'iterate_dir()' below. |
59 | * |
60 | * Note that we don't need to re-do the f_pos games, |
61 | * since the file must be locked wrt f_pos anyway. |
62 | */ |
63 | ret = -ENOENT; |
64 | if (!IS_DEADDIR(inode)) |
65 | ret = iter(file, ctx); |
66 | |
67 | downgrade_write(sem: &inode->i_rwsem); |
68 | return ret; |
69 | } |
70 | EXPORT_SYMBOL(wrap_directory_iterator); |
71 | |
72 | /* |
73 | * Note the "unsafe_put_user()" semantics: we goto a |
74 | * label for errors. |
75 | */ |
76 | #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \ |
77 | char __user *dst = (_dst); \ |
78 | const char *src = (_src); \ |
79 | size_t len = (_len); \ |
80 | unsafe_put_user(0, dst+len, label); \ |
81 | unsafe_copy_to_user(dst, src, len, label); \ |
82 | } while (0) |
83 | |
84 | |
85 | int iterate_dir(struct file *file, struct dir_context *ctx) |
86 | { |
87 | struct inode *inode = file_inode(f: file); |
88 | int res = -ENOTDIR; |
89 | |
90 | if (!file->f_op->iterate_shared) |
91 | goto out; |
92 | |
93 | res = security_file_permission(file, MAY_READ); |
94 | if (res) |
95 | goto out; |
96 | |
97 | res = fsnotify_file_perm(file, MAY_READ); |
98 | if (res) |
99 | goto out; |
100 | |
101 | res = down_read_killable(sem: &inode->i_rwsem); |
102 | if (res) |
103 | goto out; |
104 | |
105 | res = -ENOENT; |
106 | if (!IS_DEADDIR(inode)) { |
107 | ctx->pos = file->f_pos; |
108 | res = file->f_op->iterate_shared(file, ctx); |
109 | file->f_pos = ctx->pos; |
110 | fsnotify_access(file); |
111 | file_accessed(file); |
112 | } |
113 | inode_unlock_shared(inode); |
114 | out: |
115 | return res; |
116 | } |
117 | EXPORT_SYMBOL(iterate_dir); |
118 | |
119 | /* |
120 | * POSIX says that a dirent name cannot contain NULL or a '/'. |
121 | * |
122 | * It's not 100% clear what we should really do in this case. |
123 | * The filesystem is clearly corrupted, but returning a hard |
124 | * error means that you now don't see any of the other names |
125 | * either, so that isn't a perfect alternative. |
126 | * |
127 | * And if you return an error, what error do you use? Several |
128 | * filesystems seem to have decided on EUCLEAN being the error |
129 | * code for EFSCORRUPTED, and that may be the error to use. Or |
130 | * just EIO, which is perhaps more obvious to users. |
131 | * |
132 | * In order to see the other file names in the directory, the |
133 | * caller might want to make this a "soft" error: skip the |
134 | * entry, and return the error at the end instead. |
135 | * |
136 | * Note that this should likely do a "memchr(name, 0, len)" |
137 | * check too, since that would be filesystem corruption as |
138 | * well. However, that case can't actually confuse user space, |
139 | * which has to do a strlen() on the name anyway to find the |
140 | * filename length, and the above "soft error" worry means |
141 | * that it's probably better left alone until we have that |
142 | * issue clarified. |
143 | * |
144 | * Note the PATH_MAX check - it's arbitrary but the real |
145 | * kernel limit on a possible path component, not NAME_MAX, |
146 | * which is the technical standard limit. |
147 | */ |
148 | static int verify_dirent_name(const char *name, int len) |
149 | { |
150 | if (len <= 0 || len >= PATH_MAX) |
151 | return -EIO; |
152 | if (memchr(p: name, c: '/', size: len)) |
153 | return -EIO; |
154 | return 0; |
155 | } |
156 | |
157 | /* |
158 | * Traditional linux readdir() handling.. |
159 | * |
160 | * "count=1" is a special case, meaning that the buffer is one |
161 | * dirent-structure in size and that the code can't handle more |
162 | * anyway. Thus the special "fillonedir()" function for that |
163 | * case (the low-level handlers don't need to care about this). |
164 | */ |
165 | |
166 | #ifdef __ARCH_WANT_OLD_READDIR |
167 | |
168 | struct old_linux_dirent { |
169 | unsigned long d_ino; |
170 | unsigned long d_offset; |
171 | unsigned short d_namlen; |
172 | char d_name[]; |
173 | }; |
174 | |
175 | struct readdir_callback { |
176 | struct dir_context ctx; |
177 | struct old_linux_dirent __user * dirent; |
178 | int result; |
179 | }; |
180 | |
181 | static bool fillonedir(struct dir_context *ctx, const char *name, int namlen, |
182 | loff_t offset, u64 ino, unsigned int d_type) |
183 | { |
184 | struct readdir_callback *buf = |
185 | container_of(ctx, struct readdir_callback, ctx); |
186 | struct old_linux_dirent __user * dirent; |
187 | unsigned long d_ino; |
188 | |
189 | if (buf->result) |
190 | return false; |
191 | buf->result = verify_dirent_name(name, len: namlen); |
192 | if (buf->result) |
193 | return false; |
194 | d_ino = ino; |
195 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
196 | buf->result = -EOVERFLOW; |
197 | return false; |
198 | } |
199 | buf->result++; |
200 | dirent = buf->dirent; |
201 | if (!user_write_access_begin(dirent, |
202 | (unsigned long)(dirent->d_name + namlen + 1) - |
203 | (unsigned long)dirent)) |
204 | goto efault; |
205 | unsafe_put_user(d_ino, &dirent->d_ino, efault_end); |
206 | unsafe_put_user(offset, &dirent->d_offset, efault_end); |
207 | unsafe_put_user(namlen, &dirent->d_namlen, efault_end); |
208 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
209 | user_write_access_end(); |
210 | return true; |
211 | efault_end: |
212 | user_write_access_end(); |
213 | efault: |
214 | buf->result = -EFAULT; |
215 | return false; |
216 | } |
217 | |
218 | SYSCALL_DEFINE3(old_readdir, unsigned int, fd, |
219 | struct old_linux_dirent __user *, dirent, unsigned int, count) |
220 | { |
221 | int error; |
222 | CLASS(fd_pos, f)(fd); |
223 | struct readdir_callback buf = { |
224 | .ctx.actor = fillonedir, |
225 | .ctx.count = 1, /* Hint to fs: just one entry. */ |
226 | .dirent = dirent |
227 | }; |
228 | |
229 | if (fd_empty(f)) |
230 | return -EBADF; |
231 | |
232 | error = iterate_dir(fd_file(f), &buf.ctx); |
233 | if (buf.result) |
234 | error = buf.result; |
235 | |
236 | return error; |
237 | } |
238 | |
239 | #endif /* __ARCH_WANT_OLD_READDIR */ |
240 | |
241 | /* |
242 | * New, all-improved, singing, dancing, iBCS2-compliant getdents() |
243 | * interface. |
244 | */ |
245 | struct linux_dirent { |
246 | unsigned long d_ino; |
247 | unsigned long d_off; |
248 | unsigned short d_reclen; |
249 | char d_name[]; |
250 | }; |
251 | |
252 | struct getdents_callback { |
253 | struct dir_context ctx; |
254 | struct linux_dirent __user * current_dir; |
255 | int prev_reclen; |
256 | int error; |
257 | }; |
258 | |
259 | static bool filldir(struct dir_context *ctx, const char *name, int namlen, |
260 | loff_t offset, u64 ino, unsigned int d_type) |
261 | { |
262 | struct linux_dirent __user *dirent, *prev; |
263 | struct getdents_callback *buf = |
264 | container_of(ctx, struct getdents_callback, ctx); |
265 | unsigned long d_ino; |
266 | int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, |
267 | sizeof(long)); |
268 | int prev_reclen; |
269 | unsigned int flags = d_type; |
270 | |
271 | BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); |
272 | d_type &= S_DT_MASK; |
273 | |
274 | buf->error = verify_dirent_name(name, len: namlen); |
275 | if (unlikely(buf->error)) |
276 | return false; |
277 | buf->error = -EINVAL; /* only used if we fail.. */ |
278 | if (reclen > ctx->count) |
279 | return false; |
280 | d_ino = ino; |
281 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
282 | buf->error = -EOVERFLOW; |
283 | return false; |
284 | } |
285 | prev_reclen = buf->prev_reclen; |
286 | if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) |
287 | return false; |
288 | dirent = buf->current_dir; |
289 | prev = (void __user *) dirent - prev_reclen; |
290 | if (!user_write_access_begin(prev, reclen + prev_reclen)) |
291 | goto efault; |
292 | |
293 | /* This might be 'dirent->d_off', but if so it will get overwritten */ |
294 | unsafe_put_user(offset, &prev->d_off, efault_end); |
295 | unsafe_put_user(d_ino, &dirent->d_ino, efault_end); |
296 | unsafe_put_user(reclen, &dirent->d_reclen, efault_end); |
297 | unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); |
298 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
299 | user_write_access_end(); |
300 | |
301 | buf->current_dir = (void __user *)dirent + reclen; |
302 | buf->prev_reclen = reclen; |
303 | ctx->count -= reclen; |
304 | return true; |
305 | efault_end: |
306 | user_write_access_end(); |
307 | efault: |
308 | buf->error = -EFAULT; |
309 | return false; |
310 | } |
311 | |
312 | SYSCALL_DEFINE3(getdents, unsigned int, fd, |
313 | struct linux_dirent __user *, dirent, unsigned int, count) |
314 | { |
315 | CLASS(fd_pos, f)(fd); |
316 | struct getdents_callback buf = { |
317 | .ctx.actor = filldir, |
318 | .ctx.count = count, |
319 | .current_dir = dirent |
320 | }; |
321 | int error; |
322 | |
323 | if (fd_empty(f)) |
324 | return -EBADF; |
325 | |
326 | error = iterate_dir(fd_file(f), &buf.ctx); |
327 | if (error >= 0) |
328 | error = buf.error; |
329 | if (buf.prev_reclen) { |
330 | struct linux_dirent __user * lastdirent; |
331 | lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; |
332 | |
333 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
334 | error = -EFAULT; |
335 | else |
336 | error = count - buf.ctx.count; |
337 | } |
338 | return error; |
339 | } |
340 | |
341 | struct getdents_callback64 { |
342 | struct dir_context ctx; |
343 | struct linux_dirent64 __user * current_dir; |
344 | int prev_reclen; |
345 | int error; |
346 | }; |
347 | |
348 | static bool filldir64(struct dir_context *ctx, const char *name, int namlen, |
349 | loff_t offset, u64 ino, unsigned int d_type) |
350 | { |
351 | struct linux_dirent64 __user *dirent, *prev; |
352 | struct getdents_callback64 *buf = |
353 | container_of(ctx, struct getdents_callback64, ctx); |
354 | int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, |
355 | sizeof(u64)); |
356 | int prev_reclen; |
357 | unsigned int flags = d_type; |
358 | |
359 | BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); |
360 | d_type &= S_DT_MASK; |
361 | |
362 | buf->error = verify_dirent_name(name, len: namlen); |
363 | if (unlikely(buf->error)) |
364 | return false; |
365 | buf->error = -EINVAL; /* only used if we fail.. */ |
366 | if (reclen > ctx->count) |
367 | return false; |
368 | prev_reclen = buf->prev_reclen; |
369 | if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) |
370 | return false; |
371 | dirent = buf->current_dir; |
372 | prev = (void __user *)dirent - prev_reclen; |
373 | if (!user_write_access_begin(prev, reclen + prev_reclen)) |
374 | goto efault; |
375 | |
376 | /* This might be 'dirent->d_off', but if so it will get overwritten */ |
377 | unsafe_put_user(offset, &prev->d_off, efault_end); |
378 | unsafe_put_user(ino, &dirent->d_ino, efault_end); |
379 | unsafe_put_user(reclen, &dirent->d_reclen, efault_end); |
380 | unsafe_put_user(d_type, &dirent->d_type, efault_end); |
381 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
382 | user_write_access_end(); |
383 | |
384 | buf->prev_reclen = reclen; |
385 | buf->current_dir = (void __user *)dirent + reclen; |
386 | ctx->count -= reclen; |
387 | return true; |
388 | |
389 | efault_end: |
390 | user_write_access_end(); |
391 | efault: |
392 | buf->error = -EFAULT; |
393 | return false; |
394 | } |
395 | |
396 | SYSCALL_DEFINE3(getdents64, unsigned int, fd, |
397 | struct linux_dirent64 __user *, dirent, unsigned int, count) |
398 | { |
399 | CLASS(fd_pos, f)(fd); |
400 | struct getdents_callback64 buf = { |
401 | .ctx.actor = filldir64, |
402 | .ctx.count = count, |
403 | .current_dir = dirent |
404 | }; |
405 | int error; |
406 | |
407 | if (fd_empty(f)) |
408 | return -EBADF; |
409 | |
410 | error = iterate_dir(fd_file(f), &buf.ctx); |
411 | if (error >= 0) |
412 | error = buf.error; |
413 | if (buf.prev_reclen) { |
414 | struct linux_dirent64 __user * lastdirent; |
415 | typeof(lastdirent->d_off) d_off = buf.ctx.pos; |
416 | |
417 | lastdirent = (void __user *) buf.current_dir - buf.prev_reclen; |
418 | if (put_user(d_off, &lastdirent->d_off)) |
419 | error = -EFAULT; |
420 | else |
421 | error = count - buf.ctx.count; |
422 | } |
423 | return error; |
424 | } |
425 | |
426 | #ifdef CONFIG_COMPAT |
427 | struct compat_old_linux_dirent { |
428 | compat_ulong_t d_ino; |
429 | compat_ulong_t d_offset; |
430 | unsigned short d_namlen; |
431 | char d_name[]; |
432 | }; |
433 | |
434 | struct compat_readdir_callback { |
435 | struct dir_context ctx; |
436 | struct compat_old_linux_dirent __user *dirent; |
437 | int result; |
438 | }; |
439 | |
440 | static bool compat_fillonedir(struct dir_context *ctx, const char *name, |
441 | int namlen, loff_t offset, u64 ino, |
442 | unsigned int d_type) |
443 | { |
444 | struct compat_readdir_callback *buf = |
445 | container_of(ctx, struct compat_readdir_callback, ctx); |
446 | struct compat_old_linux_dirent __user *dirent; |
447 | compat_ulong_t d_ino; |
448 | |
449 | if (buf->result) |
450 | return false; |
451 | buf->result = verify_dirent_name(name, len: namlen); |
452 | if (buf->result) |
453 | return false; |
454 | d_ino = ino; |
455 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
456 | buf->result = -EOVERFLOW; |
457 | return false; |
458 | } |
459 | buf->result++; |
460 | dirent = buf->dirent; |
461 | if (!user_write_access_begin(dirent, |
462 | (unsigned long)(dirent->d_name + namlen + 1) - |
463 | (unsigned long)dirent)) |
464 | goto efault; |
465 | unsafe_put_user(d_ino, &dirent->d_ino, efault_end); |
466 | unsafe_put_user(offset, &dirent->d_offset, efault_end); |
467 | unsafe_put_user(namlen, &dirent->d_namlen, efault_end); |
468 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
469 | user_write_access_end(); |
470 | return true; |
471 | efault_end: |
472 | user_write_access_end(); |
473 | efault: |
474 | buf->result = -EFAULT; |
475 | return false; |
476 | } |
477 | |
478 | COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, |
479 | struct compat_old_linux_dirent __user *, dirent, unsigned int, count) |
480 | { |
481 | int error; |
482 | CLASS(fd_pos, f)(fd); |
483 | struct compat_readdir_callback buf = { |
484 | .ctx.actor = compat_fillonedir, |
485 | .ctx.count = 1, /* Hint to fs: just one entry. */ |
486 | .dirent = dirent |
487 | }; |
488 | |
489 | if (fd_empty(f)) |
490 | return -EBADF; |
491 | |
492 | error = iterate_dir(fd_file(f), &buf.ctx); |
493 | if (buf.result) |
494 | error = buf.result; |
495 | |
496 | return error; |
497 | } |
498 | |
499 | struct compat_linux_dirent { |
500 | compat_ulong_t d_ino; |
501 | compat_ulong_t d_off; |
502 | unsigned short d_reclen; |
503 | char d_name[]; |
504 | }; |
505 | |
506 | struct compat_getdents_callback { |
507 | struct dir_context ctx; |
508 | struct compat_linux_dirent __user *current_dir; |
509 | int prev_reclen; |
510 | int error; |
511 | }; |
512 | |
513 | static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen, |
514 | loff_t offset, u64 ino, unsigned int d_type) |
515 | { |
516 | struct compat_linux_dirent __user *dirent, *prev; |
517 | struct compat_getdents_callback *buf = |
518 | container_of(ctx, struct compat_getdents_callback, ctx); |
519 | compat_ulong_t d_ino; |
520 | int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + |
521 | namlen + 2, sizeof(compat_long_t)); |
522 | int prev_reclen; |
523 | unsigned int flags = d_type; |
524 | |
525 | BUILD_BUG_ON(FILLDIR_FLAG_NOINTR & S_DT_MASK); |
526 | d_type &= S_DT_MASK; |
527 | |
528 | buf->error = verify_dirent_name(name, len: namlen); |
529 | if (unlikely(buf->error)) |
530 | return false; |
531 | buf->error = -EINVAL; /* only used if we fail.. */ |
532 | if (reclen > ctx->count) |
533 | return false; |
534 | d_ino = ino; |
535 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
536 | buf->error = -EOVERFLOW; |
537 | return false; |
538 | } |
539 | prev_reclen = buf->prev_reclen; |
540 | if (!(flags & FILLDIR_FLAG_NOINTR) && prev_reclen && signal_pending(current)) |
541 | return false; |
542 | dirent = buf->current_dir; |
543 | prev = (void __user *) dirent - prev_reclen; |
544 | if (!user_write_access_begin(prev, reclen + prev_reclen)) |
545 | goto efault; |
546 | |
547 | unsafe_put_user(offset, &prev->d_off, efault_end); |
548 | unsafe_put_user(d_ino, &dirent->d_ino, efault_end); |
549 | unsafe_put_user(reclen, &dirent->d_reclen, efault_end); |
550 | unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); |
551 | unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
552 | user_write_access_end(); |
553 | |
554 | buf->prev_reclen = reclen; |
555 | buf->current_dir = (void __user *)dirent + reclen; |
556 | ctx->count -= reclen; |
557 | return true; |
558 | efault_end: |
559 | user_write_access_end(); |
560 | efault: |
561 | buf->error = -EFAULT; |
562 | return false; |
563 | } |
564 | |
565 | COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, |
566 | struct compat_linux_dirent __user *, dirent, unsigned int, count) |
567 | { |
568 | CLASS(fd_pos, f)(fd); |
569 | struct compat_getdents_callback buf = { |
570 | .ctx.actor = compat_filldir, |
571 | .ctx.count = count, |
572 | .current_dir = dirent, |
573 | }; |
574 | int error; |
575 | |
576 | if (fd_empty(f)) |
577 | return -EBADF; |
578 | |
579 | error = iterate_dir(fd_file(f), &buf.ctx); |
580 | if (error >= 0) |
581 | error = buf.error; |
582 | if (buf.prev_reclen) { |
583 | struct compat_linux_dirent __user * lastdirent; |
584 | lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; |
585 | |
586 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
587 | error = -EFAULT; |
588 | else |
589 | error = count - buf.ctx.count; |
590 | } |
591 | return error; |
592 | } |
593 | #endif |
594 | |