1// SPDX-License-Identifier: GPL-2.0
2/*
3 * gendisk handling
4 *
5 * Portions Copyright (C) 2020 Christoph Hellwig
6 */
7
8#include <linux/module.h>
9#include <linux/ctype.h>
10#include <linux/fs.h>
11#include <linux/kdev_t.h>
12#include <linux/kernel.h>
13#include <linux/blkdev.h>
14#include <linux/backing-dev.h>
15#include <linux/init.h>
16#include <linux/spinlock.h>
17#include <linux/proc_fs.h>
18#include <linux/seq_file.h>
19#include <linux/slab.h>
20#include <linux/kmod.h>
21#include <linux/major.h>
22#include <linux/mutex.h>
23#include <linux/idr.h>
24#include <linux/log2.h>
25#include <linux/pm_runtime.h>
26#include <linux/badblocks.h>
27#include <linux/part_stat.h>
28#include <linux/blktrace_api.h>
29
30#include "blk-throttle.h"
31#include "blk.h"
32#include "blk-mq-sched.h"
33#include "blk-rq-qos.h"
34#include "blk-cgroup.h"
35
36static struct kobject *block_depr;
37
38/*
39 * Unique, monotonically increasing sequential number associated with block
40 * devices instances (i.e. incremented each time a device is attached).
41 * Associating uevents with block devices in userspace is difficult and racy:
42 * the uevent netlink socket is lossy, and on slow and overloaded systems has
43 * a very high latency.
44 * Block devices do not have exclusive owners in userspace, any process can set
45 * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
46 * can be reused again and again).
47 * A userspace process setting up a block device and watching for its events
48 * cannot thus reliably tell whether an event relates to the device it just set
49 * up or another earlier instance with the same name.
50 * This sequential number allows userspace processes to solve this problem, and
51 * uniquely associate an uevent to the lifetime to a device.
52 */
53static atomic64_t diskseq;
54
55/* for extended dynamic devt allocation, currently only one major is used */
56#define NR_EXT_DEVT (1 << MINORBITS)
57static DEFINE_IDA(ext_devt_ida);
58
59void set_capacity(struct gendisk *disk, sector_t sectors)
60{
61 if (sectors > BLK_DEV_MAX_SECTORS) {
62 pr_warn_once("%s: truncate capacity from %lld to %lld\n",
63 disk->disk_name, sectors,
64 BLK_DEV_MAX_SECTORS);
65 sectors = BLK_DEV_MAX_SECTORS;
66 }
67
68 bdev_set_nr_sectors(bdev: disk->part0, sectors);
69}
70EXPORT_SYMBOL(set_capacity);
71
72/*
73 * Set disk capacity and notify if the size is not currently zero and will not
74 * be set to zero. Returns true if a uevent was sent, otherwise false.
75 */
76bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
77{
78 sector_t capacity = get_capacity(disk);
79 char *envp[] = { "RESIZE=1", NULL };
80
81 set_capacity(disk, size);
82
83 /*
84 * Only print a message and send a uevent if the gendisk is user visible
85 * and alive. This avoids spamming the log and udev when setting the
86 * initial capacity during probing.
87 */
88 if (size == capacity ||
89 !disk_live(disk) ||
90 (disk->flags & GENHD_FL_HIDDEN))
91 return false;
92
93 pr_info("%s: detected capacity change from %lld to %lld\n",
94 disk->disk_name, capacity, size);
95
96 /*
97 * Historically we did not send a uevent for changes to/from an empty
98 * device.
99 */
100 if (!capacity || !size)
101 return false;
102 kobject_uevent_env(kobj: &disk_to_dev(disk)->kobj, action: KOBJ_CHANGE, envp);
103 return true;
104}
105EXPORT_SYMBOL_GPL(set_capacity_and_notify);
106
107static void part_stat_read_all(struct block_device *part,
108 struct disk_stats *stat)
109{
110 int cpu;
111
112 memset(stat, 0, sizeof(struct disk_stats));
113 for_each_possible_cpu(cpu) {
114 struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
115 int group;
116
117 for (group = 0; group < NR_STAT_GROUPS; group++) {
118 stat->nsecs[group] += ptr->nsecs[group];
119 stat->sectors[group] += ptr->sectors[group];
120 stat->ios[group] += ptr->ios[group];
121 stat->merges[group] += ptr->merges[group];
122 }
123
124 stat->io_ticks += ptr->io_ticks;
125 }
126}
127
128static void bdev_count_inflight_rw(struct block_device *part,
129 unsigned int inflight[2], bool mq_driver)
130{
131 int cpu;
132
133 if (mq_driver) {
134 blk_mq_in_driver_rw(part, inflight);
135 } else {
136 for_each_possible_cpu(cpu) {
137 inflight[READ] += part_stat_local_read_cpu(
138 part, in_flight[READ], cpu);
139 inflight[WRITE] += part_stat_local_read_cpu(
140 part, in_flight[WRITE], cpu);
141 }
142 }
143
144 if (WARN_ON_ONCE((int)inflight[READ] < 0))
145 inflight[READ] = 0;
146 if (WARN_ON_ONCE((int)inflight[WRITE] < 0))
147 inflight[WRITE] = 0;
148}
149
150/**
151 * bdev_count_inflight - get the number of inflight IOs for a block device.
152 *
153 * @part: the block device.
154 *
155 * Inflight here means started IO accounting, from bdev_start_io_acct() for
156 * bio-based block device, and from blk_account_io_start() for rq-based block
157 * device.
158 */
159unsigned int bdev_count_inflight(struct block_device *part)
160{
161 unsigned int inflight[2] = {0};
162
163 bdev_count_inflight_rw(part, inflight, mq_driver: false);
164
165 return inflight[READ] + inflight[WRITE];
166}
167EXPORT_SYMBOL_GPL(bdev_count_inflight);
168
169/*
170 * Can be deleted altogether. Later.
171 *
172 */
173#define BLKDEV_MAJOR_HASH_SIZE 255
174static struct blk_major_name {
175 struct blk_major_name *next;
176 int major;
177 char name[16];
178#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
179 void (*probe)(dev_t devt);
180#endif
181} *major_names[BLKDEV_MAJOR_HASH_SIZE];
182static DEFINE_MUTEX(major_names_lock);
183static DEFINE_SPINLOCK(major_names_spinlock);
184
185/* index in the above - for now: assume no multimajor ranges */
186static inline int major_to_index(unsigned major)
187{
188 return major % BLKDEV_MAJOR_HASH_SIZE;
189}
190
191#ifdef CONFIG_PROC_FS
192void blkdev_show(struct seq_file *seqf, off_t offset)
193{
194 struct blk_major_name *dp;
195
196 spin_lock(lock: &major_names_spinlock);
197 for (dp = major_names[major_to_index(major: offset)]; dp; dp = dp->next)
198 if (dp->major == offset)
199 seq_printf(m: seqf, fmt: "%3d %s\n", dp->major, dp->name);
200 spin_unlock(lock: &major_names_spinlock);
201}
202#endif /* CONFIG_PROC_FS */
203
204/**
205 * __register_blkdev - register a new block device
206 *
207 * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
208 * @major = 0, try to allocate any unused major number.
209 * @name: the name of the new block device as a zero terminated string
210 * @probe: pre-devtmpfs / pre-udev callback used to create disks when their
211 * pre-created device node is accessed. When a probe call uses
212 * add_disk() and it fails the driver must cleanup resources. This
213 * interface may soon be removed.
214 *
215 * The @name must be unique within the system.
216 *
217 * The return value depends on the @major input parameter:
218 *
219 * - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
220 * then the function returns zero on success, or a negative error code
221 * - if any unused major number was requested with @major = 0 parameter
222 * then the return value is the allocated major number in range
223 * [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
224 *
225 * See Documentation/admin-guide/devices.txt for the list of allocated
226 * major numbers.
227 *
228 * Use register_blkdev instead for any new code.
229 */
230int __register_blkdev(unsigned int major, const char *name,
231 void (*probe)(dev_t devt))
232{
233 struct blk_major_name **n, *p;
234 int index, ret = 0;
235
236 mutex_lock(&major_names_lock);
237
238 /* temporary */
239 if (major == 0) {
240 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
241 if (major_names[index] == NULL)
242 break;
243 }
244
245 if (index == 0) {
246 printk("%s: failed to get major for %s\n",
247 __func__, name);
248 ret = -EBUSY;
249 goto out;
250 }
251 major = index;
252 ret = major;
253 }
254
255 if (major >= BLKDEV_MAJOR_MAX) {
256 pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
257 __func__, major, BLKDEV_MAJOR_MAX-1, name);
258
259 ret = -EINVAL;
260 goto out;
261 }
262
263 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
264 if (p == NULL) {
265 ret = -ENOMEM;
266 goto out;
267 }
268
269 p->major = major;
270#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
271 p->probe = probe;
272#endif
273 strscpy(p->name, name, sizeof(p->name));
274 p->next = NULL;
275 index = major_to_index(major);
276
277 spin_lock(lock: &major_names_spinlock);
278 for (n = &major_names[index]; *n; n = &(*n)->next) {
279 if ((*n)->major == major)
280 break;
281 }
282 if (!*n)
283 *n = p;
284 else
285 ret = -EBUSY;
286 spin_unlock(lock: &major_names_spinlock);
287
288 if (ret < 0) {
289 printk("register_blkdev: cannot get major %u for %s\n",
290 major, name);
291 kfree(objp: p);
292 }
293out:
294 mutex_unlock(lock: &major_names_lock);
295 return ret;
296}
297EXPORT_SYMBOL(__register_blkdev);
298
299void unregister_blkdev(unsigned int major, const char *name)
300{
301 struct blk_major_name **n;
302 struct blk_major_name *p = NULL;
303 int index = major_to_index(major);
304
305 mutex_lock(&major_names_lock);
306 spin_lock(lock: &major_names_spinlock);
307 for (n = &major_names[index]; *n; n = &(*n)->next)
308 if ((*n)->major == major)
309 break;
310 if (!*n || strcmp((*n)->name, name)) {
311 WARN_ON(1);
312 } else {
313 p = *n;
314 *n = p->next;
315 }
316 spin_unlock(lock: &major_names_spinlock);
317 mutex_unlock(lock: &major_names_lock);
318 kfree(objp: p);
319}
320
321EXPORT_SYMBOL(unregister_blkdev);
322
323int blk_alloc_ext_minor(void)
324{
325 int idx;
326
327 idx = ida_alloc_range(&ext_devt_ida, min: 0, NR_EXT_DEVT - 1, GFP_KERNEL);
328 if (idx == -ENOSPC)
329 return -EBUSY;
330 return idx;
331}
332
333void blk_free_ext_minor(unsigned int minor)
334{
335 ida_free(&ext_devt_ida, id: minor);
336}
337
338void disk_uevent(struct gendisk *disk, enum kobject_action action)
339{
340 struct block_device *part;
341 unsigned long idx;
342
343 rcu_read_lock();
344 xa_for_each(&disk->part_tbl, idx, part) {
345 if (bdev_is_partition(bdev: part) && !bdev_nr_sectors(bdev: part))
346 continue;
347 if (!kobject_get_unless_zero(kobj: &part->bd_device.kobj))
348 continue;
349
350 rcu_read_unlock();
351 kobject_uevent(bdev_kobj(part), action);
352 put_device(dev: &part->bd_device);
353 rcu_read_lock();
354 }
355 rcu_read_unlock();
356}
357EXPORT_SYMBOL_GPL(disk_uevent);
358
359int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
360{
361 struct file *file;
362 int ret = 0;
363
364 if (!disk_has_partscan(disk))
365 return -EINVAL;
366 if (disk->open_partitions)
367 return -EBUSY;
368
369 /*
370 * If the device is opened exclusively by current thread already, it's
371 * safe to scan partitons, otherwise, use bd_prepare_to_claim() to
372 * synchronize with other exclusive openers and other partition
373 * scanners.
374 */
375 if (!(mode & BLK_OPEN_EXCL)) {
376 ret = bd_prepare_to_claim(bdev: disk->part0, holder: disk_scan_partitions,
377 NULL);
378 if (ret)
379 return ret;
380 }
381
382 set_bit(GD_NEED_PART_SCAN, addr: &disk->state);
383 file = bdev_file_open_by_dev(dev: disk_devt(disk), mode: mode & ~BLK_OPEN_EXCL,
384 NULL, NULL);
385 if (IS_ERR(ptr: file))
386 ret = PTR_ERR(ptr: file);
387 else
388 fput(file);
389
390 /*
391 * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
392 * and this will cause that re-assemble partitioned raid device will
393 * creat partition for underlying disk.
394 */
395 clear_bit(GD_NEED_PART_SCAN, addr: &disk->state);
396 if (!(mode & BLK_OPEN_EXCL))
397 bd_abort_claiming(bdev: disk->part0, holder: disk_scan_partitions);
398 return ret;
399}
400
401static void add_disk_final(struct gendisk *disk)
402{
403 struct device *ddev = disk_to_dev(disk);
404
405 if (!(disk->flags & GENHD_FL_HIDDEN)) {
406 /* Make sure the first partition scan will be proceed */
407 if (get_capacity(disk) && disk_has_partscan(disk))
408 set_bit(GD_NEED_PART_SCAN, addr: &disk->state);
409
410 bdev_add(bdev: disk->part0, dev: ddev->devt);
411 if (get_capacity(disk))
412 disk_scan_partitions(disk, BLK_OPEN_READ);
413
414 /*
415 * Announce the disk and partitions after all partitions are
416 * created. (for hidden disks uevents remain suppressed forever)
417 */
418 dev_set_uevent_suppress(dev: ddev, val: 0);
419 disk_uevent(disk, KOBJ_ADD);
420 }
421
422 blk_apply_bdi_limits(bdi: disk->bdi, lim: &disk->queue->limits);
423 disk_add_events(disk);
424 set_bit(GD_ADDED, addr: &disk->state);
425}
426
427static int __add_disk(struct device *parent, struct gendisk *disk,
428 const struct attribute_group **groups,
429 struct fwnode_handle *fwnode)
430
431{
432 struct device *ddev = disk_to_dev(disk);
433 int ret;
434
435 if (WARN_ON_ONCE(bdev_nr_sectors(disk->part0) > BLK_DEV_MAX_SECTORS))
436 return -EINVAL;
437
438 if (queue_is_mq(q: disk->queue)) {
439 /*
440 * ->submit_bio and ->poll_bio are bypassed for blk-mq drivers.
441 */
442 if (disk->fops->submit_bio || disk->fops->poll_bio)
443 return -EINVAL;
444 } else {
445 if (!disk->fops->submit_bio)
446 return -EINVAL;
447 bdev_set_flag(bdev: disk->part0, BD_HAS_SUBMIT_BIO);
448 }
449
450 /*
451 * If the driver provides an explicit major number it also must provide
452 * the number of minors numbers supported, and those will be used to
453 * setup the gendisk.
454 * Otherwise just allocate the device numbers for both the whole device
455 * and all partitions from the extended dev_t space.
456 */
457 ret = -EINVAL;
458 if (disk->major) {
459 if (WARN_ON(!disk->minors))
460 goto out;
461
462 if (disk->minors > DISK_MAX_PARTS) {
463 pr_err("block: can't allocate more than %d partitions\n",
464 DISK_MAX_PARTS);
465 disk->minors = DISK_MAX_PARTS;
466 }
467 if (disk->first_minor > MINORMASK ||
468 disk->minors > MINORMASK + 1 ||
469 disk->first_minor + disk->minors > MINORMASK + 1)
470 goto out;
471 } else {
472 if (WARN_ON(disk->minors))
473 goto out;
474
475 ret = blk_alloc_ext_minor();
476 if (ret < 0)
477 goto out;
478 disk->major = BLOCK_EXT_MAJOR;
479 disk->first_minor = ret;
480 }
481
482 /* delay uevents, until we scanned partition table */
483 dev_set_uevent_suppress(dev: ddev, val: 1);
484
485 ddev->parent = parent;
486 ddev->groups = groups;
487 dev_set_name(dev: ddev, name: "%s", disk->disk_name);
488 if (fwnode)
489 device_set_node(dev: ddev, fwnode);
490 if (!(disk->flags & GENHD_FL_HIDDEN))
491 ddev->devt = MKDEV(disk->major, disk->first_minor);
492 ret = device_add(dev: ddev);
493 if (ret)
494 goto out_free_ext_minor;
495
496 ret = disk_alloc_events(disk);
497 if (ret)
498 goto out_device_del;
499
500 ret = sysfs_create_link(kobj: block_depr, target: &ddev->kobj,
501 name: kobject_name(kobj: &ddev->kobj));
502 if (ret)
503 goto out_device_del;
504
505 /*
506 * avoid probable deadlock caused by allocating memory with
507 * GFP_KERNEL in runtime_resume callback of its all ancestor
508 * devices
509 */
510 pm_runtime_set_memalloc_noio(dev: ddev, enable: true);
511
512 disk->part0->bd_holder_dir =
513 kobject_create_and_add(name: "holders", parent: &ddev->kobj);
514 if (!disk->part0->bd_holder_dir) {
515 ret = -ENOMEM;
516 goto out_del_block_link;
517 }
518 disk->slave_dir = kobject_create_and_add(name: "slaves", parent: &ddev->kobj);
519 if (!disk->slave_dir) {
520 ret = -ENOMEM;
521 goto out_put_holder_dir;
522 }
523
524 ret = blk_register_queue(disk);
525 if (ret)
526 goto out_put_slave_dir;
527
528 if (!(disk->flags & GENHD_FL_HIDDEN)) {
529 ret = bdi_register(bdi: disk->bdi, fmt: "%u:%u",
530 disk->major, disk->first_minor);
531 if (ret)
532 goto out_unregister_queue;
533 bdi_set_owner(bdi: disk->bdi, owner: ddev);
534 ret = sysfs_create_link(kobj: &ddev->kobj,
535 target: &disk->bdi->dev->kobj, name: "bdi");
536 if (ret)
537 goto out_unregister_bdi;
538 } else {
539 /*
540 * Even if the block_device for a hidden gendisk is not
541 * registered, it needs to have a valid bd_dev so that the
542 * freeing of the dynamic major works.
543 */
544 disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
545 }
546 return 0;
547
548out_unregister_bdi:
549 if (!(disk->flags & GENHD_FL_HIDDEN))
550 bdi_unregister(bdi: disk->bdi);
551out_unregister_queue:
552 blk_unregister_queue(disk);
553 rq_qos_exit(disk->queue);
554out_put_slave_dir:
555 kobject_put(kobj: disk->slave_dir);
556 disk->slave_dir = NULL;
557out_put_holder_dir:
558 kobject_put(kobj: disk->part0->bd_holder_dir);
559out_del_block_link:
560 sysfs_remove_link(kobj: block_depr, name: dev_name(dev: ddev));
561 pm_runtime_set_memalloc_noio(dev: ddev, enable: false);
562out_device_del:
563 device_del(dev: ddev);
564out_free_ext_minor:
565 if (disk->major == BLOCK_EXT_MAJOR)
566 blk_free_ext_minor(minor: disk->first_minor);
567out:
568 return ret;
569}
570
571/**
572 * add_disk_fwnode - add disk information to kernel list with fwnode
573 * @parent: parent device for the disk
574 * @disk: per-device partitioning information
575 * @groups: Additional per-device sysfs groups
576 * @fwnode: attached disk fwnode
577 *
578 * This function registers the partitioning information in @disk
579 * with the kernel. Also attach a fwnode to the disk device.
580 */
581int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
582 const struct attribute_group **groups,
583 struct fwnode_handle *fwnode)
584{
585 struct blk_mq_tag_set *set;
586 unsigned int memflags;
587 int ret;
588
589 if (queue_is_mq(q: disk->queue)) {
590 set = disk->queue->tag_set;
591 memflags = memalloc_noio_save();
592 down_read(sem: &set->update_nr_hwq_lock);
593 ret = __add_disk(parent, disk, groups, fwnode);
594 up_read(sem: &set->update_nr_hwq_lock);
595 memalloc_noio_restore(flags: memflags);
596 } else {
597 ret = __add_disk(parent, disk, groups, fwnode);
598 }
599
600 /*
601 * add_disk_final() needn't to read `nr_hw_queues`, so move it out
602 * of read lock `set->update_nr_hwq_lock` for avoiding unnecessary
603 * lock dependency on `disk->open_mutex` from scanning partition.
604 */
605 if (!ret)
606 add_disk_final(disk);
607 return ret;
608}
609EXPORT_SYMBOL_GPL(add_disk_fwnode);
610
611/**
612 * device_add_disk - add disk information to kernel list
613 * @parent: parent device for the disk
614 * @disk: per-device partitioning information
615 * @groups: Additional per-device sysfs groups
616 *
617 * This function registers the partitioning information in @disk
618 * with the kernel.
619 */
620int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
621 const struct attribute_group **groups)
622{
623 return add_disk_fwnode(parent, disk, groups, NULL);
624}
625EXPORT_SYMBOL(device_add_disk);
626
627static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
628{
629 struct block_device *bdev;
630 unsigned long idx;
631
632 /*
633 * On surprise disk removal, bdev_mark_dead() may call into file
634 * systems below. Make it clear that we're expecting to not hold
635 * disk->open_mutex.
636 */
637 lockdep_assert_not_held(&disk->open_mutex);
638
639 rcu_read_lock();
640 xa_for_each(&disk->part_tbl, idx, bdev) {
641 if (!kobject_get_unless_zero(kobj: &bdev->bd_device.kobj))
642 continue;
643 rcu_read_unlock();
644
645 bdev_mark_dead(bdev, surprise);
646
647 put_device(dev: &bdev->bd_device);
648 rcu_read_lock();
649 }
650 rcu_read_unlock();
651}
652
653static bool __blk_mark_disk_dead(struct gendisk *disk)
654{
655 /*
656 * Fail any new I/O.
657 */
658 if (test_and_set_bit(GD_DEAD, addr: &disk->state))
659 return false;
660
661 if (test_bit(GD_OWNS_QUEUE, &disk->state))
662 blk_queue_flag_set(flag: QUEUE_FLAG_DYING, q: disk->queue);
663
664 /*
665 * Stop buffered writers from dirtying pages that can't be written out.
666 */
667 set_capacity(disk, 0);
668
669 /*
670 * Prevent new I/O from crossing bio_queue_enter().
671 */
672 return blk_queue_start_drain(q: disk->queue);
673}
674
675/**
676 * blk_mark_disk_dead - mark a disk as dead
677 * @disk: disk to mark as dead
678 *
679 * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
680 * to this disk.
681 */
682void blk_mark_disk_dead(struct gendisk *disk)
683{
684 __blk_mark_disk_dead(disk);
685 blk_report_disk_dead(disk, surprise: true);
686}
687EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
688
689static void __del_gendisk(struct gendisk *disk)
690{
691 struct request_queue *q = disk->queue;
692 struct block_device *part;
693 unsigned long idx;
694 bool start_drain;
695
696 might_sleep();
697
698 if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
699 return;
700
701 disk_del_events(disk);
702
703 /*
704 * Prevent new openers by unlinked the bdev inode.
705 */
706 mutex_lock(&disk->open_mutex);
707 xa_for_each(&disk->part_tbl, idx, part)
708 bdev_unhash(bdev: part);
709 mutex_unlock(lock: &disk->open_mutex);
710
711 /*
712 * Tell the file system to write back all dirty data and shut down if
713 * it hasn't been notified earlier.
714 */
715 if (!test_bit(GD_DEAD, &disk->state))
716 blk_report_disk_dead(disk, surprise: false);
717
718 /*
719 * Drop all partitions now that the disk is marked dead.
720 */
721 mutex_lock(&disk->open_mutex);
722 start_drain = __blk_mark_disk_dead(disk);
723 if (start_drain)
724 blk_freeze_acquire_lock(q);
725 xa_for_each_start(&disk->part_tbl, idx, part, 1)
726 drop_partition(part);
727 mutex_unlock(lock: &disk->open_mutex);
728
729 if (!(disk->flags & GENHD_FL_HIDDEN)) {
730 sysfs_remove_link(kobj: &disk_to_dev(disk)->kobj, name: "bdi");
731
732 /*
733 * Unregister bdi before releasing device numbers (as they can
734 * get reused and we'd get clashes in sysfs).
735 */
736 bdi_unregister(bdi: disk->bdi);
737 }
738
739 blk_unregister_queue(disk);
740
741 kobject_put(kobj: disk->part0->bd_holder_dir);
742 kobject_put(kobj: disk->slave_dir);
743 disk->slave_dir = NULL;
744
745 part_stat_set_all(part: disk->part0, value: 0);
746 disk->part0->bd_stamp = 0;
747 sysfs_remove_link(kobj: block_depr, name: dev_name(disk_to_dev(disk)));
748 pm_runtime_set_memalloc_noio(disk_to_dev(disk), enable: false);
749 device_del(disk_to_dev(disk));
750
751 blk_mq_freeze_queue_wait(q);
752
753 blk_throtl_cancel_bios(disk);
754
755 blk_sync_queue(q);
756 blk_flush_integrity();
757
758 if (queue_is_mq(q))
759 blk_mq_cancel_work_sync(q);
760
761 rq_qos_exit(q);
762
763 /*
764 * If the disk does not own the queue, allow using passthrough requests
765 * again. Else leave the queue frozen to fail all I/O.
766 */
767 if (!test_bit(GD_OWNS_QUEUE, &disk->state))
768 __blk_mq_unfreeze_queue(q, force_atomic: true);
769 else if (queue_is_mq(q))
770 blk_mq_exit_queue(q);
771
772 if (start_drain)
773 blk_unfreeze_release_lock(q);
774}
775
776static void disable_elv_switch(struct request_queue *q)
777{
778 struct blk_mq_tag_set *set = q->tag_set;
779 WARN_ON_ONCE(!queue_is_mq(q));
780
781 down_write(sem: &set->update_nr_hwq_lock);
782 blk_queue_flag_set(flag: QUEUE_FLAG_NO_ELV_SWITCH, q);
783 up_write(sem: &set->update_nr_hwq_lock);
784}
785
786/**
787 * del_gendisk - remove the gendisk
788 * @disk: the struct gendisk to remove
789 *
790 * Removes the gendisk and all its associated resources. This deletes the
791 * partitions associated with the gendisk, and unregisters the associated
792 * request_queue.
793 *
794 * This is the counter to the respective __device_add_disk() call.
795 *
796 * The final removal of the struct gendisk happens when its refcount reaches 0
797 * with put_disk(), which should be called after del_gendisk(), if
798 * __device_add_disk() was used.
799 *
800 * Drivers exist which depend on the release of the gendisk to be synchronous,
801 * it should not be deferred.
802 *
803 * Context: can sleep
804 */
805void del_gendisk(struct gendisk *disk)
806{
807 struct blk_mq_tag_set *set;
808 unsigned int memflags;
809
810 if (!queue_is_mq(q: disk->queue)) {
811 __del_gendisk(disk);
812 } else {
813 set = disk->queue->tag_set;
814
815 disable_elv_switch(q: disk->queue);
816
817 memflags = memalloc_noio_save();
818 down_read(sem: &set->update_nr_hwq_lock);
819 __del_gendisk(disk);
820 up_read(sem: &set->update_nr_hwq_lock);
821 memalloc_noio_restore(flags: memflags);
822 }
823}
824EXPORT_SYMBOL(del_gendisk);
825
826/**
827 * invalidate_disk - invalidate the disk
828 * @disk: the struct gendisk to invalidate
829 *
830 * A helper to invalidates the disk. It will clean the disk's associated
831 * buffer/page caches and reset its internal states so that the disk
832 * can be reused by the drivers.
833 *
834 * Context: can sleep
835 */
836void invalidate_disk(struct gendisk *disk)
837{
838 struct block_device *bdev = disk->part0;
839
840 invalidate_bdev(bdev);
841 bdev->bd_mapping->wb_err = 0;
842 set_capacity(disk, 0);
843}
844EXPORT_SYMBOL(invalidate_disk);
845
846/* sysfs access to bad-blocks list. */
847static ssize_t disk_badblocks_show(struct device *dev,
848 struct device_attribute *attr,
849 char *page)
850{
851 struct gendisk *disk = dev_to_disk(dev);
852
853 if (!disk->bb)
854 return sysfs_emit(buf: page, fmt: "\n");
855
856 return badblocks_show(bb: disk->bb, page, unack: 0);
857}
858
859static ssize_t disk_badblocks_store(struct device *dev,
860 struct device_attribute *attr,
861 const char *page, size_t len)
862{
863 struct gendisk *disk = dev_to_disk(dev);
864
865 if (!disk->bb)
866 return -ENXIO;
867
868 return badblocks_store(bb: disk->bb, page, len, unack: 0);
869}
870
871#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
872static bool blk_probe_dev(dev_t devt)
873{
874 unsigned int major = MAJOR(devt);
875 struct blk_major_name **n;
876
877 mutex_lock(&major_names_lock);
878 for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
879 if ((*n)->major == major && (*n)->probe) {
880 (*n)->probe(devt);
881 mutex_unlock(lock: &major_names_lock);
882 return true;
883 }
884 }
885 mutex_unlock(lock: &major_names_lock);
886 return false;
887}
888
889void blk_request_module(dev_t devt)
890{
891 int error;
892
893 if (blk_probe_dev(devt))
894 return;
895
896 error = request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt));
897 /* Make old-style 2.4 aliases work */
898 if (error > 0)
899 error = request_module("block-major-%d", MAJOR(devt));
900 if (!error)
901 blk_probe_dev(devt);
902}
903#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
904
905#ifdef CONFIG_PROC_FS
906/* iterator */
907static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
908{
909 loff_t skip = *pos;
910 struct class_dev_iter *iter;
911 struct device *dev;
912
913 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
914 if (!iter)
915 return ERR_PTR(error: -ENOMEM);
916
917 seqf->private = iter;
918 class_dev_iter_init(iter, class: &block_class, NULL, type: &disk_type);
919 do {
920 dev = class_dev_iter_next(iter);
921 if (!dev)
922 return NULL;
923 } while (skip--);
924
925 return dev_to_disk(dev);
926}
927
928static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
929{
930 struct device *dev;
931
932 (*pos)++;
933 dev = class_dev_iter_next(iter: seqf->private);
934 if (dev)
935 return dev_to_disk(dev);
936
937 return NULL;
938}
939
940static void disk_seqf_stop(struct seq_file *seqf, void *v)
941{
942 struct class_dev_iter *iter = seqf->private;
943
944 /* stop is called even after start failed :-( */
945 if (iter) {
946 class_dev_iter_exit(iter);
947 kfree(objp: iter);
948 seqf->private = NULL;
949 }
950}
951
952static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
953{
954 void *p;
955
956 p = disk_seqf_start(seqf, pos);
957 if (!IS_ERR_OR_NULL(ptr: p) && !*pos)
958 seq_puts(m: seqf, s: "major minor #blocks name\n\n");
959 return p;
960}
961
962static int show_partition(struct seq_file *seqf, void *v)
963{
964 struct gendisk *sgp = v;
965 struct block_device *part;
966 unsigned long idx;
967
968 if (!get_capacity(disk: sgp) || (sgp->flags & GENHD_FL_HIDDEN))
969 return 0;
970
971 rcu_read_lock();
972 xa_for_each(&sgp->part_tbl, idx, part) {
973 if (!bdev_nr_sectors(bdev: part))
974 continue;
975 seq_printf(m: seqf, fmt: "%4d %7d %10llu %pg\n",
976 MAJOR(part->bd_dev), MINOR(part->bd_dev),
977 bdev_nr_sectors(bdev: part) >> 1, part);
978 }
979 rcu_read_unlock();
980 return 0;
981}
982
983static const struct seq_operations partitions_op = {
984 .start = show_partition_start,
985 .next = disk_seqf_next,
986 .stop = disk_seqf_stop,
987 .show = show_partition
988};
989#endif
990
991static int __init genhd_device_init(void)
992{
993 int error;
994
995 error = class_register(class: &block_class);
996 if (unlikely(error))
997 return error;
998 blk_dev_init();
999
1000 register_blkdev(BLOCK_EXT_MAJOR, "blkext");
1001
1002 /* create top-level block dir */
1003 block_depr = kobject_create_and_add(name: "block", NULL);
1004 return 0;
1005}
1006
1007subsys_initcall(genhd_device_init);
1008
1009static ssize_t disk_range_show(struct device *dev,
1010 struct device_attribute *attr, char *buf)
1011{
1012 struct gendisk *disk = dev_to_disk(dev);
1013
1014 return sysfs_emit(buf, fmt: "%d\n", disk->minors);
1015}
1016
1017static ssize_t disk_ext_range_show(struct device *dev,
1018 struct device_attribute *attr, char *buf)
1019{
1020 struct gendisk *disk = dev_to_disk(dev);
1021
1022 return sysfs_emit(buf, fmt: "%d\n",
1023 (disk->flags & GENHD_FL_NO_PART) ? 1 : DISK_MAX_PARTS);
1024}
1025
1026static ssize_t disk_removable_show(struct device *dev,
1027 struct device_attribute *attr, char *buf)
1028{
1029 struct gendisk *disk = dev_to_disk(dev);
1030
1031 return sysfs_emit(buf, fmt: "%d\n",
1032 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
1033}
1034
1035static ssize_t disk_hidden_show(struct device *dev,
1036 struct device_attribute *attr, char *buf)
1037{
1038 struct gendisk *disk = dev_to_disk(dev);
1039
1040 return sysfs_emit(buf, fmt: "%d\n",
1041 (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
1042}
1043
1044static ssize_t disk_ro_show(struct device *dev,
1045 struct device_attribute *attr, char *buf)
1046{
1047 struct gendisk *disk = dev_to_disk(dev);
1048
1049 return sysfs_emit(buf, fmt: "%d\n", get_disk_ro(disk) ? 1 : 0);
1050}
1051
1052ssize_t part_size_show(struct device *dev,
1053 struct device_attribute *attr, char *buf)
1054{
1055 return sysfs_emit(buf, fmt: "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
1056}
1057
1058ssize_t part_stat_show(struct device *dev,
1059 struct device_attribute *attr, char *buf)
1060{
1061 struct block_device *bdev = dev_to_bdev(dev);
1062 struct disk_stats stat;
1063 unsigned int inflight;
1064
1065 inflight = bdev_count_inflight(bdev);
1066 if (inflight) {
1067 part_stat_lock();
1068 update_io_ticks(part: bdev, now: jiffies, end: true);
1069 part_stat_unlock();
1070 }
1071 part_stat_read_all(part: bdev, stat: &stat);
1072 return sysfs_emit(buf,
1073 fmt: "%8lu %8lu %8llu %8u "
1074 "%8lu %8lu %8llu %8u "
1075 "%8u %8u %8u "
1076 "%8lu %8lu %8llu %8u "
1077 "%8lu %8u"
1078 "\n",
1079 stat.ios[STAT_READ],
1080 stat.merges[STAT_READ],
1081 (unsigned long long)stat.sectors[STAT_READ],
1082 (unsigned int)div_u64(dividend: stat.nsecs[STAT_READ], NSEC_PER_MSEC),
1083 stat.ios[STAT_WRITE],
1084 stat.merges[STAT_WRITE],
1085 (unsigned long long)stat.sectors[STAT_WRITE],
1086 (unsigned int)div_u64(dividend: stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
1087 inflight,
1088 jiffies_to_msecs(j: stat.io_ticks),
1089 (unsigned int)div_u64(dividend: stat.nsecs[STAT_READ] +
1090 stat.nsecs[STAT_WRITE] +
1091 stat.nsecs[STAT_DISCARD] +
1092 stat.nsecs[STAT_FLUSH],
1093 NSEC_PER_MSEC),
1094 stat.ios[STAT_DISCARD],
1095 stat.merges[STAT_DISCARD],
1096 (unsigned long long)stat.sectors[STAT_DISCARD],
1097 (unsigned int)div_u64(dividend: stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
1098 stat.ios[STAT_FLUSH],
1099 (unsigned int)div_u64(dividend: stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
1100}
1101
1102/*
1103 * Show the number of IOs issued to driver.
1104 * For bio-based device, started from bdev_start_io_acct();
1105 * For rq-based device, started from blk_mq_start_request();
1106 */
1107ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
1108 char *buf)
1109{
1110 struct block_device *bdev = dev_to_bdev(dev);
1111 struct request_queue *q = bdev_get_queue(bdev);
1112 unsigned int inflight[2] = {0};
1113
1114 bdev_count_inflight_rw(part: bdev, inflight, mq_driver: queue_is_mq(q));
1115
1116 return sysfs_emit(buf, fmt: "%8u %8u\n", inflight[READ], inflight[WRITE]);
1117}
1118
1119static ssize_t disk_capability_show(struct device *dev,
1120 struct device_attribute *attr, char *buf)
1121{
1122 dev_warn_once(dev, "the capability attribute has been deprecated.\n");
1123 return sysfs_emit(buf, fmt: "0\n");
1124}
1125
1126static ssize_t disk_alignment_offset_show(struct device *dev,
1127 struct device_attribute *attr,
1128 char *buf)
1129{
1130 struct gendisk *disk = dev_to_disk(dev);
1131
1132 return sysfs_emit(buf, fmt: "%d\n", bdev_alignment_offset(bdev: disk->part0));
1133}
1134
1135static ssize_t disk_discard_alignment_show(struct device *dev,
1136 struct device_attribute *attr,
1137 char *buf)
1138{
1139 struct gendisk *disk = dev_to_disk(dev);
1140
1141 return sysfs_emit(buf, fmt: "%d\n", bdev_alignment_offset(bdev: disk->part0));
1142}
1143
1144static ssize_t diskseq_show(struct device *dev,
1145 struct device_attribute *attr, char *buf)
1146{
1147 struct gendisk *disk = dev_to_disk(dev);
1148
1149 return sysfs_emit(buf, fmt: "%llu\n", disk->diskseq);
1150}
1151
1152static ssize_t partscan_show(struct device *dev,
1153 struct device_attribute *attr, char *buf)
1154{
1155 return sysfs_emit(buf, fmt: "%u\n", disk_has_partscan(dev_to_disk(dev)));
1156}
1157
1158static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
1159static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
1160static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
1161static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
1162static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
1163static DEVICE_ATTR(size, 0444, part_size_show, NULL);
1164static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
1165static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
1166static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
1167static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
1168static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
1169static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
1170static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
1171static DEVICE_ATTR(partscan, 0444, partscan_show, NULL);
1172
1173#ifdef CONFIG_FAIL_MAKE_REQUEST
1174ssize_t part_fail_show(struct device *dev,
1175 struct device_attribute *attr, char *buf)
1176{
1177 return sysfs_emit(buf, fmt: "%d\n",
1178 bdev_test_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL));
1179}
1180
1181ssize_t part_fail_store(struct device *dev,
1182 struct device_attribute *attr,
1183 const char *buf, size_t count)
1184{
1185 int i;
1186
1187 if (count > 0 && sscanf(buf, "%d", &i) > 0) {
1188 if (i)
1189 bdev_set_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL);
1190 else
1191 bdev_clear_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL);
1192 }
1193 return count;
1194}
1195
1196static struct device_attribute dev_attr_fail =
1197 __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
1198#endif /* CONFIG_FAIL_MAKE_REQUEST */
1199
1200#ifdef CONFIG_FAIL_IO_TIMEOUT
1201static struct device_attribute dev_attr_fail_timeout =
1202 __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
1203#endif
1204
1205static struct attribute *disk_attrs[] = {
1206 &dev_attr_range.attr,
1207 &dev_attr_ext_range.attr,
1208 &dev_attr_removable.attr,
1209 &dev_attr_hidden.attr,
1210 &dev_attr_ro.attr,
1211 &dev_attr_size.attr,
1212 &dev_attr_alignment_offset.attr,
1213 &dev_attr_discard_alignment.attr,
1214 &dev_attr_capability.attr,
1215 &dev_attr_stat.attr,
1216 &dev_attr_inflight.attr,
1217 &dev_attr_badblocks.attr,
1218 &dev_attr_events.attr,
1219 &dev_attr_events_async.attr,
1220 &dev_attr_events_poll_msecs.attr,
1221 &dev_attr_diskseq.attr,
1222 &dev_attr_partscan.attr,
1223#ifdef CONFIG_FAIL_MAKE_REQUEST
1224 &dev_attr_fail.attr,
1225#endif
1226#ifdef CONFIG_FAIL_IO_TIMEOUT
1227 &dev_attr_fail_timeout.attr,
1228#endif
1229 NULL
1230};
1231
1232static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
1233{
1234 struct device *dev = container_of(kobj, typeof(*dev), kobj);
1235 struct gendisk *disk = dev_to_disk(dev);
1236
1237 if (a == &dev_attr_badblocks.attr && !disk->bb)
1238 return 0;
1239 return a->mode;
1240}
1241
1242static struct attribute_group disk_attr_group = {
1243 .attrs = disk_attrs,
1244 .is_visible = disk_visible,
1245};
1246
1247static const struct attribute_group *disk_attr_groups[] = {
1248 &disk_attr_group,
1249#ifdef CONFIG_BLK_DEV_IO_TRACE
1250 &blk_trace_attr_group,
1251#endif
1252#ifdef CONFIG_BLK_DEV_INTEGRITY
1253 &blk_integrity_attr_group,
1254#endif
1255 NULL
1256};
1257
1258/**
1259 * disk_release - releases all allocated resources of the gendisk
1260 * @dev: the device representing this disk
1261 *
1262 * This function releases all allocated resources of the gendisk.
1263 *
1264 * Drivers which used __device_add_disk() have a gendisk with a request_queue
1265 * assigned. Since the request_queue sits on top of the gendisk for these
1266 * drivers we also call blk_put_queue() for them, and we expect the
1267 * request_queue refcount to reach 0 at this point, and so the request_queue
1268 * will also be freed prior to the disk.
1269 *
1270 * Context: can sleep
1271 */
1272static void disk_release(struct device *dev)
1273{
1274 struct gendisk *disk = dev_to_disk(dev);
1275
1276 might_sleep();
1277 WARN_ON_ONCE(disk_live(disk));
1278
1279 blk_trace_remove(q: disk->queue);
1280
1281 /*
1282 * To undo the all initialization from blk_mq_init_allocated_queue in
1283 * case of a probe failure where add_disk is never called we have to
1284 * call blk_mq_exit_queue here. We can't do this for the more common
1285 * teardown case (yet) as the tagset can be gone by the time the disk
1286 * is released once it was added.
1287 */
1288 if (queue_is_mq(q: disk->queue) &&
1289 test_bit(GD_OWNS_QUEUE, &disk->state) &&
1290 !test_bit(GD_ADDED, &disk->state))
1291 blk_mq_exit_queue(q: disk->queue);
1292
1293 blkcg_exit_disk(disk);
1294
1295 bioset_exit(&disk->bio_split);
1296
1297 disk_release_events(disk);
1298 kfree(objp: disk->random);
1299 disk_free_zone_resources(disk);
1300 xa_destroy(&disk->part_tbl);
1301
1302 disk->queue->disk = NULL;
1303 blk_put_queue(disk->queue);
1304
1305 if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
1306 disk->fops->free_disk(disk);
1307
1308 bdev_drop(bdev: disk->part0); /* frees the disk */
1309}
1310
1311static int block_uevent(const struct device *dev, struct kobj_uevent_env *env)
1312{
1313 const struct gendisk *disk = dev_to_disk(dev);
1314
1315 return add_uevent_var(env, format: "DISKSEQ=%llu", disk->diskseq);
1316}
1317
1318const struct class block_class = {
1319 .name = "block",
1320 .dev_uevent = block_uevent,
1321};
1322
1323static char *block_devnode(const struct device *dev, umode_t *mode,
1324 kuid_t *uid, kgid_t *gid)
1325{
1326 struct gendisk *disk = dev_to_disk(dev);
1327
1328 if (disk->fops->devnode)
1329 return disk->fops->devnode(disk, mode);
1330 return NULL;
1331}
1332
1333const struct device_type disk_type = {
1334 .name = "disk",
1335 .groups = disk_attr_groups,
1336 .release = disk_release,
1337 .devnode = block_devnode,
1338};
1339
1340#ifdef CONFIG_PROC_FS
1341/*
1342 * aggregate disk stat collector. Uses the same stats that the sysfs
1343 * entries do, above, but makes them available through one seq_file.
1344 *
1345 * The output looks suspiciously like /proc/partitions with a bunch of
1346 * extra fields.
1347 */
1348static int diskstats_show(struct seq_file *seqf, void *v)
1349{
1350 struct gendisk *gp = v;
1351 struct block_device *hd;
1352 unsigned int inflight;
1353 struct disk_stats stat;
1354 unsigned long idx;
1355
1356 /*
1357 if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1358 seq_puts(seqf, "major minor name"
1359 " rio rmerge rsect ruse wio wmerge "
1360 "wsect wuse running use aveq"
1361 "\n\n");
1362 */
1363
1364 rcu_read_lock();
1365 xa_for_each(&gp->part_tbl, idx, hd) {
1366 if (bdev_is_partition(bdev: hd) && !bdev_nr_sectors(bdev: hd))
1367 continue;
1368
1369 inflight = bdev_count_inflight(hd);
1370 if (inflight) {
1371 part_stat_lock();
1372 update_io_ticks(part: hd, now: jiffies, end: true);
1373 part_stat_unlock();
1374 }
1375 part_stat_read_all(part: hd, stat: &stat);
1376 seq_put_decimal_ull_width(m: seqf, delimiter: "", MAJOR(hd->bd_dev), width: 4);
1377 seq_put_decimal_ull_width(m: seqf, delimiter: " ", MINOR(hd->bd_dev), width: 7);
1378 seq_printf(m: seqf, fmt: " %pg", hd);
1379 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.ios[STAT_READ]);
1380 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.merges[STAT_READ]);
1381 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.sectors[STAT_READ]);
1382 seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_READ],
1383 NSEC_PER_MSEC));
1384 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.ios[STAT_WRITE]);
1385 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.merges[STAT_WRITE]);
1386 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.sectors[STAT_WRITE]);
1387 seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_WRITE],
1388 NSEC_PER_MSEC));
1389 seq_put_decimal_ull(m: seqf, delimiter: " ", num: inflight);
1390 seq_put_decimal_ull(m: seqf, delimiter: " ", num: jiffies_to_msecs(j: stat.io_ticks));
1391 seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_READ] +
1392 stat.nsecs[STAT_WRITE] +
1393 stat.nsecs[STAT_DISCARD] +
1394 stat.nsecs[STAT_FLUSH],
1395 NSEC_PER_MSEC));
1396 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.ios[STAT_DISCARD]);
1397 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.merges[STAT_DISCARD]);
1398 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.sectors[STAT_DISCARD]);
1399 seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_DISCARD],
1400 NSEC_PER_MSEC));
1401 seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.ios[STAT_FLUSH]);
1402 seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_FLUSH],
1403 NSEC_PER_MSEC));
1404 seq_putc(m: seqf, c: '\n');
1405 }
1406 rcu_read_unlock();
1407
1408 return 0;
1409}
1410
1411static const struct seq_operations diskstats_op = {
1412 .start = disk_seqf_start,
1413 .next = disk_seqf_next,
1414 .stop = disk_seqf_stop,
1415 .show = diskstats_show
1416};
1417
1418static int __init proc_genhd_init(void)
1419{
1420 proc_create_seq("diskstats", 0, NULL, &diskstats_op);
1421 proc_create_seq("partitions", 0, NULL, &partitions_op);
1422 return 0;
1423}
1424module_init(proc_genhd_init);
1425#endif /* CONFIG_PROC_FS */
1426
1427dev_t part_devt(struct gendisk *disk, u8 partno)
1428{
1429 struct block_device *part;
1430 dev_t devt = 0;
1431
1432 rcu_read_lock();
1433 part = xa_load(&disk->part_tbl, index: partno);
1434 if (part)
1435 devt = part->bd_dev;
1436 rcu_read_unlock();
1437
1438 return devt;
1439}
1440
1441struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
1442 struct lock_class_key *lkclass)
1443{
1444 struct gendisk *disk;
1445
1446 disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
1447 if (!disk)
1448 return NULL;
1449
1450 if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, 0, flags: 0))
1451 goto out_free_disk;
1452
1453 disk->bdi = bdi_alloc(node_id);
1454 if (!disk->bdi)
1455 goto out_free_bioset;
1456
1457 /* bdev_alloc() might need the queue, set before the first call */
1458 disk->queue = q;
1459
1460 disk->part0 = bdev_alloc(disk, partno: 0);
1461 if (!disk->part0)
1462 goto out_free_bdi;
1463
1464 disk->node_id = node_id;
1465 mutex_init(&disk->open_mutex);
1466 xa_init(xa: &disk->part_tbl);
1467 if (xa_insert(xa: &disk->part_tbl, index: 0, entry: disk->part0, GFP_KERNEL))
1468 goto out_destroy_part_tbl;
1469
1470 if (blkcg_init_disk(disk))
1471 goto out_erase_part0;
1472
1473 disk_init_zone_resources(disk);
1474 rand_initialize_disk(disk);
1475 disk_to_dev(disk)->class = &block_class;
1476 disk_to_dev(disk)->type = &disk_type;
1477 device_initialize(disk_to_dev(disk));
1478 inc_diskseq(disk);
1479 q->disk = disk;
1480 lockdep_init_map(lock: &disk->lockdep_map, name: "(bio completion)", key: lkclass, subclass: 0);
1481#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
1482 INIT_LIST_HEAD(list: &disk->slave_bdevs);
1483#endif
1484 mutex_init(&disk->rqos_state_mutex);
1485 return disk;
1486
1487out_erase_part0:
1488 xa_erase(&disk->part_tbl, index: 0);
1489out_destroy_part_tbl:
1490 xa_destroy(&disk->part_tbl);
1491 disk->part0->bd_disk = NULL;
1492 bdev_drop(bdev: disk->part0);
1493out_free_bdi:
1494 bdi_put(bdi: disk->bdi);
1495out_free_bioset:
1496 bioset_exit(&disk->bio_split);
1497out_free_disk:
1498 kfree(objp: disk);
1499 return NULL;
1500}
1501
1502struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node,
1503 struct lock_class_key *lkclass)
1504{
1505 struct queue_limits default_lim = { };
1506 struct request_queue *q;
1507 struct gendisk *disk;
1508
1509 q = blk_alloc_queue(lim: lim ? lim : &default_lim, node_id: node);
1510 if (IS_ERR(ptr: q))
1511 return ERR_CAST(ptr: q);
1512
1513 disk = __alloc_disk_node(q, node_id: node, lkclass);
1514 if (!disk) {
1515 blk_put_queue(q);
1516 return ERR_PTR(error: -ENOMEM);
1517 }
1518 set_bit(GD_OWNS_QUEUE, addr: &disk->state);
1519 return disk;
1520}
1521EXPORT_SYMBOL(__blk_alloc_disk);
1522
1523/**
1524 * put_disk - decrements the gendisk refcount
1525 * @disk: the struct gendisk to decrement the refcount for
1526 *
1527 * This decrements the refcount for the struct gendisk. When this reaches 0
1528 * we'll have disk_release() called.
1529 *
1530 * Note: for blk-mq disk put_disk must be called before freeing the tag_set
1531 * when handling probe errors (that is before add_disk() is called).
1532 *
1533 * Context: Any context, but the last reference must not be dropped from
1534 * atomic context.
1535 */
1536void put_disk(struct gendisk *disk)
1537{
1538 if (disk)
1539 put_device(disk_to_dev(disk));
1540}
1541EXPORT_SYMBOL(put_disk);
1542
1543static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1544{
1545 char event[] = "DISK_RO=1";
1546 char *envp[] = { event, NULL };
1547
1548 if (!ro)
1549 event[8] = '0';
1550 kobject_uevent_env(kobj: &disk_to_dev(gd)->kobj, action: KOBJ_CHANGE, envp);
1551}
1552
1553/**
1554 * set_disk_ro - set a gendisk read-only
1555 * @disk: gendisk to operate on
1556 * @read_only: %true to set the disk read-only, %false set the disk read/write
1557 *
1558 * This function is used to indicate whether a given disk device should have its
1559 * read-only flag set. set_disk_ro() is typically used by device drivers to
1560 * indicate whether the underlying physical device is write-protected.
1561 */
1562void set_disk_ro(struct gendisk *disk, bool read_only)
1563{
1564 if (read_only) {
1565 if (test_and_set_bit(GD_READ_ONLY, addr: &disk->state))
1566 return;
1567 } else {
1568 if (!test_and_clear_bit(GD_READ_ONLY, addr: &disk->state))
1569 return;
1570 }
1571 set_disk_ro_uevent(gd: disk, ro: read_only);
1572}
1573EXPORT_SYMBOL(set_disk_ro);
1574
1575void inc_diskseq(struct gendisk *disk)
1576{
1577 disk->diskseq = atomic64_inc_return(v: &diskseq);
1578}
1579

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of linux/block/genhd.c