genhd.c source code [linux/block/genhd.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* gendisk handling
4	*
5	* Portions Copyright (C) 2020 Christoph Hellwig
6	*/
7
8	#include <linux/module.h>
9	#include <linux/ctype.h>
10	#include <linux/fs.h>
11	#include <linux/kdev_t.h>
12	#include <linux/kernel.h>
13	#include <linux/blkdev.h>
14	#include <linux/backing-dev.h>
15	#include <linux/init.h>
16	#include <linux/spinlock.h>
17	#include <linux/proc_fs.h>
18	#include <linux/seq_file.h>
19	#include <linux/slab.h>
20	#include <linux/kmod.h>
21	#include <linux/major.h>
22	#include <linux/mutex.h>
23	#include <linux/idr.h>
24	#include <linux/log2.h>
25	#include <linux/pm_runtime.h>
26	#include <linux/badblocks.h>
27	#include <linux/part_stat.h>
28	#include <linux/blktrace_api.h>
29
30	#include "blk-throttle.h"
31	#include "blk.h"
32	#include "blk-mq-sched.h"
33	#include "blk-rq-qos.h"
34	#include "blk-cgroup.h"
35
36	static struct kobject *block_depr;
37
38	/*
39	* Unique, monotonically increasing sequential number associated with block
40	* devices instances (i.e. incremented each time a device is attached).
41	* Associating uevents with block devices in userspace is difficult and racy:
42	* the uevent netlink socket is lossy, and on slow and overloaded systems has
43	* a very high latency.
44	* Block devices do not have exclusive owners in userspace, any process can set
45	* one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
46	* can be reused again and again).
47	* A userspace process setting up a block device and watching for its events
48	* cannot thus reliably tell whether an event relates to the device it just set
49	* up or another earlier instance with the same name.
50	* This sequential number allows userspace processes to solve this problem, and
51	* uniquely associate an uevent to the lifetime to a device.
52	*/
53	static atomic64_t diskseq;
54
55	/ for extended dynamic devt allocation, currently only one major is used /
56	#define NR_EXT_DEVT (1 << MINORBITS)
57	static DEFINE_IDA(ext_devt_ida);
58
59	void set_capacity(struct gendisk *disk, sector_t sectors)
60	{
61	if (sectors > BLK_DEV_MAX_SECTORS) {
62	pr_warn_once("%s: truncate capacity from %lld to %lld\n",
63	disk->disk_name, sectors,
64	BLK_DEV_MAX_SECTORS);
65	sectors = BLK_DEV_MAX_SECTORS;
66	}
67
68	bdev_set_nr_sectors(bdev: disk->part0, sectors);
69	}
70	EXPORT_SYMBOL(set_capacity);
71
72	/*
73	* Set disk capacity and notify if the size is not currently zero and will not
74	* be set to zero. Returns true if a uevent was sent, otherwise false.
75	*/
76	bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
77	{
78	sector_t capacity = get_capacity(disk);
79	char *envp[] = { "RESIZE=1", NULL };
80
81	set_capacity(disk, size);
82
83	/*
84	* Only print a message and send a uevent if the gendisk is user visible
85	* and alive. This avoids spamming the log and udev when setting the
86	* initial capacity during probing.
87	*/
88	if (size == capacity \|\|
89	!disk_live(disk) \|\|
90	(disk->flags & GENHD_FL_HIDDEN))
91	return false;
92
93	pr_info("%s: detected capacity change from %lld to %lld\n",
94	disk->disk_name, capacity, size);
95
96	/*
97	* Historically we did not send a uevent for changes to/from an empty
98	* device.
99	*/
100	if (!capacity \|\| !size)
101	return false;
102	kobject_uevent_env(kobj: &disk_to_dev(disk)->kobj, action: KOBJ_CHANGE, envp);
103	return true;
104	}
105	EXPORT_SYMBOL_GPL(set_capacity_and_notify);
106
107	static void part_stat_read_all(struct block_device *part,
108	struct disk_stats *stat)
109	{
110	int cpu;
111
112	memset(stat, `0`, sizeof(struct disk_stats));
113	for_each_possible_cpu(cpu) {
114	struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
115	int group;
116
117	for (group = `0`; group < NR_STAT_GROUPS; group++) {
118	stat->nsecs[group] += ptr->nsecs[group];
119	stat->sectors[group] += ptr->sectors[group];
120	stat->ios[group] += ptr->ios[group];
121	stat->merges[group] += ptr->merges[group];
122	}
123
124	stat->io_ticks += ptr->io_ticks;
125	}
126	}
127
128	static void bdev_count_inflight_rw(struct block_device *part,
129	unsigned int inflight[`2`], bool mq_driver)
130	{
131	int cpu;
132
133	if (mq_driver) {
134	blk_mq_in_driver_rw(part, inflight);
135	} else {
136	for_each_possible_cpu(cpu) {
137	inflight[READ] += part_stat_local_read_cpu(
138	part, in_flight[READ], cpu);
139	inflight[WRITE] += part_stat_local_read_cpu(
140	part, in_flight[WRITE], cpu);
141	}
142	}
143
144	if (WARN_ON_ONCE((int)inflight[READ] < `0`))
145	inflight[READ] = `0`;
146	if (WARN_ON_ONCE((int)inflight[WRITE] < `0`))
147	inflight[WRITE] = `0`;
148	}
149
150	/**
151	* bdev_count_inflight - get the number of inflight IOs for a block device.
152	*
153	* @part: the block device.
154	*
155	* Inflight here means started IO accounting, from bdev_start_io_acct() for
156	* bio-based block device, and from blk_account_io_start() for rq-based block
157	* device.
158	*/
159	unsigned int bdev_count_inflight(struct block_device *part)
160	{
161	unsigned int inflight[`2`] = {`0`};
162
163	bdev_count_inflight_rw(part, inflight, mq_driver: false);
164
165	return inflight[READ] + inflight[WRITE];
166	}
167	EXPORT_SYMBOL_GPL(bdev_count_inflight);
168
169	/*
170	* Can be deleted altogether. Later.
171	*
172	*/
173	#define BLKDEV_MAJOR_HASH_SIZE 255
174	static struct blk_major_name {
175	struct blk_major_name *next;
176	int major;
177	char name[`16`];
178	#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
179	void (*probe)(dev_t devt);
180	#endif
181	} *major_names[BLKDEV_MAJOR_HASH_SIZE];
182	static DEFINE_MUTEX(major_names_lock);
183	static DEFINE_SPINLOCK(major_names_spinlock);
184
185	/ index in the above - for now: assume no multimajor ranges /
186	static inline int major_to_index(unsigned major)
187	{
188	return major % BLKDEV_MAJOR_HASH_SIZE;
189	}
190
191	#ifdef CONFIG_PROC_FS
192	void blkdev_show(struct seq_file *seqf, off_t offset)
193	{
194	struct blk_major_name *dp;
195
196	spin_lock(lock: &major_names_spinlock);
197	for (dp = major_names[major_to_index(major: offset)]; dp; dp = dp->next)
198	if (dp->major == offset)
199	seq_printf(m: seqf, fmt: "%3d %s\n", dp->major, dp->name);
200	spin_unlock(lock: &major_names_spinlock);
201	}
202	#endif /* CONFIG_PROC_FS */
203
204	/**
205	* __register_blkdev - register a new block device
206	*
207	* @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
208	* @major = 0, try to allocate any unused major number.
209	* @name: the name of the new block device as a zero terminated string
210	* @probe: pre-devtmpfs / pre-udev callback used to create disks when their
211	* pre-created device node is accessed. When a probe call uses
212	* add_disk() and it fails the driver must cleanup resources. This
213	* interface may soon be removed.
214	*
215	* The @name must be unique within the system.
216	*
217	* The return value depends on the @major input parameter:
218	*
219	* - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
220	* then the function returns zero on success, or a negative error code
221	* - if any unused major number was requested with @major = 0 parameter
222	* then the return value is the allocated major number in range
223	* [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
224	*
225	* See Documentation/admin-guide/devices.txt for the list of allocated
226	* major numbers.
227	*
228	* Use register_blkdev instead for any new code.
229	*/
230	int __register_blkdev(unsigned int major, const char *name,
231	void (*probe)(dev_t devt))
232	{
233	struct blk_major_name *n, p;
234	int index, ret = `0`;
235
236	mutex_lock(&major_names_lock);
237
238	/ temporary /
239	if (major == `0`) {
240	for (index = ARRAY_SIZE(major_names)-`1`; index > `0`; index--) {
241	if (major_names[index] == NULL)
242	break;
243	}
244
245	if (index == `0`) {
246	printk("%s: failed to get major for %s\n",
247	__func__, name);
248	ret = -EBUSY;
249	goto out;
250	}
251	major = index;
252	ret = major;
253	}
254
255	if (major >= BLKDEV_MAJOR_MAX) {
256	pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
257	__func__, major, BLKDEV_MAJOR_MAX-`1`, name);
258
259	ret = -EINVAL;
260	goto out;
261	}
262
263	p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
264	if (p == NULL) {
265	ret = -ENOMEM;
266	goto out;
267	}
268
269	p->major = major;
270	#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
271	p->probe = probe;
272	#endif
273	strscpy(p->name, name, sizeof(p->name));
274	p->next = NULL;
275	index = major_to_index(major);
276
277	spin_lock(lock: &major_names_spinlock);
278	for (n = &major_names[index]; n; n = &(n)->next) {
279	if ((*n)->major == major)
280	break;
281	}
282	if (!*n)
283	*n = p;
284	else
285	ret = -EBUSY;
286	spin_unlock(lock: &major_names_spinlock);
287
288	if (ret < `0`) {
289	printk("register_blkdev: cannot get major %u for %s\n",
290	major, name);
291	kfree(objp: p);
292	}
293	out:
294	mutex_unlock(lock: &major_names_lock);
295	return ret;
296	}
297	EXPORT_SYMBOL(__register_blkdev);
298
299	void unregister_blkdev(unsigned int major, const char *name)
300	{
301	struct blk_major_name **n;
302	struct blk_major_name *p = NULL;
303	int index = major_to_index(major);
304
305	mutex_lock(&major_names_lock);
306	spin_lock(lock: &major_names_spinlock);
307	for (n = &major_names[index]; n; n = &(n)->next)
308	if ((*n)->major == major)
309	break;
310	if (!n \|\| strcmp((n)->name, name)) {
311	WARN_ON(`1`);
312	} else {
313	p = *n;
314	*n = p->next;
315	}
316	spin_unlock(lock: &major_names_spinlock);
317	mutex_unlock(lock: &major_names_lock);
318	kfree(objp: p);
319	}
320
321	EXPORT_SYMBOL(unregister_blkdev);
322
323	int blk_alloc_ext_minor(void)
324	{
325	int idx;
326
327	idx = ida_alloc_range(&ext_devt_ida, min: `0`, NR_EXT_DEVT - `1`, GFP_KERNEL);
328	if (idx == -ENOSPC)
329	return -EBUSY;
330	return idx;
331	}
332
333	void blk_free_ext_minor(unsigned int minor)
334	{
335	ida_free(&ext_devt_ida, id: minor);
336	}
337
338	void disk_uevent(struct gendisk disk, enum* kobject_action action)
339	{
340	struct block_device *part;
341	unsigned long idx;
342
343	rcu_read_lock();
344	xa_for_each(&disk->part_tbl, idx, part) {
345	if (bdev_is_partition(bdev: part) && !bdev_nr_sectors(bdev: part))
346	continue;
347	if (!kobject_get_unless_zero(kobj: &part->bd_device.kobj))
348	continue;
349
350	rcu_read_unlock();
351	kobject_uevent(bdev_kobj(part), action);
352	put_device(dev: &part->bd_device);
353	rcu_read_lock();
354	}
355	rcu_read_unlock();
356	}
357	EXPORT_SYMBOL_GPL(disk_uevent);
358
359	int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
360	{
361	struct file *file;
362	int ret = `0`;
363
364	if (!disk_has_partscan(disk))
365	return -EINVAL;
366	if (disk->open_partitions)
367	return -EBUSY;
368
369	/*
370	* If the device is opened exclusively by current thread already, it's
371	* safe to scan partitons, otherwise, use bd_prepare_to_claim() to
372	* synchronize with other exclusive openers and other partition
373	* scanners.
374	*/
375	if (!(mode & BLK_OPEN_EXCL)) {
376	ret = bd_prepare_to_claim(bdev: disk->part0, holder: disk_scan_partitions,
377	NULL);
378	if (ret)
379	return ret;
380	}
381
382	set_bit(GD_NEED_PART_SCAN, addr: &disk->state);
383	file = bdev_file_open_by_dev(dev: disk_devt(disk), mode: mode & ~BLK_OPEN_EXCL,
384	NULL, NULL);
385	if (IS_ERR(ptr: file))
386	ret = PTR_ERR(ptr: file);
387	else
388	fput(file);
389
390	/*
391	* If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
392	* and this will cause that re-assemble partitioned raid device will
393	* creat partition for underlying disk.
394	*/
395	clear_bit(GD_NEED_PART_SCAN, addr: &disk->state);
396	if (!(mode & BLK_OPEN_EXCL))
397	bd_abort_claiming(bdev: disk->part0, holder: disk_scan_partitions);
398	return ret;
399	}
400
401	static void add_disk_final(struct gendisk *disk)
402	{
403	struct device *ddev = disk_to_dev(disk);
404
405	if (!(disk->flags & GENHD_FL_HIDDEN)) {
406	/ Make sure the first partition scan will be proceed /
407	if (get_capacity(disk) && disk_has_partscan(disk))
408	set_bit(GD_NEED_PART_SCAN, addr: &disk->state);
409
410	bdev_add(bdev: disk->part0, dev: ddev->devt);
411	if (get_capacity(disk))
412	disk_scan_partitions(disk, BLK_OPEN_READ);
413
414	/*
415	* Announce the disk and partitions after all partitions are
416	* created. (for hidden disks uevents remain suppressed forever)
417	*/
418	dev_set_uevent_suppress(dev: ddev, val: `0`);
419	disk_uevent(disk, KOBJ_ADD);
420	}
421
422	blk_apply_bdi_limits(bdi: disk->bdi, lim: &disk->queue->limits);
423	disk_add_events(disk);
424	set_bit(GD_ADDED, addr: &disk->state);
425	}
426
427	static int __add_disk(struct device parent, struct* gendisk *disk,
428	const struct attribute_group **groups,
429	struct fwnode_handle *fwnode)
430
431	{
432	struct device *ddev = disk_to_dev(disk);
433	int ret;
434
435	if (WARN_ON_ONCE(bdev_nr_sectors(disk->part0) > BLK_DEV_MAX_SECTORS))
436	return -EINVAL;
437
438	if (queue_is_mq(q: disk->queue)) {
439	/*
440	* ->submit_bio and ->poll_bio are bypassed for blk-mq drivers.
441	*/
442	if (disk->fops->submit_bio \|\| disk->fops->poll_bio)
443	return -EINVAL;
444	} else {
445	if (!disk->fops->submit_bio)
446	return -EINVAL;
447	bdev_set_flag(bdev: disk->part0, BD_HAS_SUBMIT_BIO);
448	}
449
450	/*
451	* If the driver provides an explicit major number it also must provide
452	* the number of minors numbers supported, and those will be used to
453	* setup the gendisk.
454	* Otherwise just allocate the device numbers for both the whole device
455	* and all partitions from the extended dev_t space.
456	*/
457	ret = -EINVAL;
458	if (disk->major) {
459	if (WARN_ON(!disk->minors))
460	goto out;
461
462	if (disk->minors > DISK_MAX_PARTS) {
463	pr_err("block: can't allocate more than %d partitions\n",
464	DISK_MAX_PARTS);
465	disk->minors = DISK_MAX_PARTS;
466	}
467	if (disk->first_minor > MINORMASK \|\|
468	disk->minors > MINORMASK + `1` \|\|
469	disk->first_minor + disk->minors > MINORMASK + `1`)
470	goto out;
471	} else {
472	if (WARN_ON(disk->minors))
473	goto out;
474
475	ret = blk_alloc_ext_minor();
476	if (ret < `0`)
477	goto out;
478	disk->major = BLOCK_EXT_MAJOR;
479	disk->first_minor = ret;
480	}
481
482	/ delay uevents, until we scanned partition table /
483	dev_set_uevent_suppress(dev: ddev, val: `1`);
484
485	ddev->parent = parent;
486	ddev->groups = groups;
487	dev_set_name(dev: ddev, name: "%s", disk->disk_name);
488	if (fwnode)
489	device_set_node(dev: ddev, fwnode);
490	if (!(disk->flags & GENHD_FL_HIDDEN))
491	ddev->devt = MKDEV(disk->major, disk->first_minor);
492	ret = device_add(dev: ddev);
493	if (ret)
494	goto out_free_ext_minor;
495
496	ret = disk_alloc_events(disk);
497	if (ret)
498	goto out_device_del;
499
500	ret = sysfs_create_link(kobj: block_depr, target: &ddev->kobj,
501	name: kobject_name(kobj: &ddev->kobj));
502	if (ret)
503	goto out_device_del;
504
505	/*
506	* avoid probable deadlock caused by allocating memory with
507	* GFP_KERNEL in runtime_resume callback of its all ancestor
508	* devices
509	*/
510	pm_runtime_set_memalloc_noio(dev: ddev, enable: true);
511
512	disk->part0->bd_holder_dir =
513	kobject_create_and_add(name: "holders", parent: &ddev->kobj);
514	if (!disk->part0->bd_holder_dir) {
515	ret = -ENOMEM;
516	goto out_del_block_link;
517	}
518	disk->slave_dir = kobject_create_and_add(name: "slaves", parent: &ddev->kobj);
519	if (!disk->slave_dir) {
520	ret = -ENOMEM;
521	goto out_put_holder_dir;
522	}
523
524	ret = blk_register_queue(disk);
525	if (ret)
526	goto out_put_slave_dir;
527
528	if (!(disk->flags & GENHD_FL_HIDDEN)) {
529	ret = bdi_register(bdi: disk->bdi, fmt: "%u:%u",
530	disk->major, disk->first_minor);
531	if (ret)
532	goto out_unregister_queue;
533	bdi_set_owner(bdi: disk->bdi, owner: ddev);
534	ret = sysfs_create_link(kobj: &ddev->kobj,
535	target: &disk->bdi->dev->kobj, name: "bdi");
536	if (ret)
537	goto out_unregister_bdi;
538	} else {
539	/*
540	* Even if the block_device for a hidden gendisk is not
541	* registered, it needs to have a valid bd_dev so that the
542	* freeing of the dynamic major works.
543	*/
544	disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
545	}
546	return `0`;
547
548	out_unregister_bdi:
549	if (!(disk->flags & GENHD_FL_HIDDEN))
550	bdi_unregister(bdi: disk->bdi);
551	out_unregister_queue:
552	blk_unregister_queue(disk);
553	rq_qos_exit(disk->queue);
554	out_put_slave_dir:
555	kobject_put(kobj: disk->slave_dir);
556	disk->slave_dir = NULL;
557	out_put_holder_dir:
558	kobject_put(kobj: disk->part0->bd_holder_dir);
559	out_del_block_link:
560	sysfs_remove_link(kobj: block_depr, name: dev_name(dev: ddev));
561	pm_runtime_set_memalloc_noio(dev: ddev, enable: false);
562	out_device_del:
563	device_del(dev: ddev);
564	out_free_ext_minor:
565	if (disk->major == BLOCK_EXT_MAJOR)
566	blk_free_ext_minor(minor: disk->first_minor);
567	out:
568	return ret;
569	}
570
571	/**
572	* add_disk_fwnode - add disk information to kernel list with fwnode
573	* @parent: parent device for the disk
574	* @disk: per-device partitioning information
575	* @groups: Additional per-device sysfs groups
576	* @fwnode: attached disk fwnode
577	*
578	* This function registers the partitioning information in @disk
579	* with the kernel. Also attach a fwnode to the disk device.
580	*/
581	int __must_check add_disk_fwnode(struct device parent, struct* gendisk *disk,
582	const struct attribute_group **groups,
583	struct fwnode_handle *fwnode)
584	{
585	struct blk_mq_tag_set *set;
586	unsigned int memflags;
587	int ret;
588
589	if (queue_is_mq(q: disk->queue)) {
590	set = disk->queue->tag_set;
591	memflags = memalloc_noio_save();
592	down_read(sem: &set->update_nr_hwq_lock);
593	ret = __add_disk(parent, disk, groups, fwnode);
594	up_read(sem: &set->update_nr_hwq_lock);
595	memalloc_noio_restore(flags: memflags);
596	} else {
597	ret = __add_disk(parent, disk, groups, fwnode);
598	}
599
600	/*
601	* add_disk_final() needn't to read `nr_hw_queues`, so move it out
602	* of read lock `set->update_nr_hwq_lock` for avoiding unnecessary
603	* lock dependency on `disk->open_mutex` from scanning partition.
604	*/
605	if (!ret)
606	add_disk_final(disk);
607	return ret;
608	}
609	EXPORT_SYMBOL_GPL(add_disk_fwnode);
610
611	/**
612	* device_add_disk - add disk information to kernel list
613	* @parent: parent device for the disk
614	* @disk: per-device partitioning information
615	* @groups: Additional per-device sysfs groups
616	*
617	* This function registers the partitioning information in @disk
618	* with the kernel.
619	*/
620	int __must_check device_add_disk(struct device parent, struct* gendisk *disk,
621	const struct attribute_group **groups)
622	{
623	return add_disk_fwnode(parent, disk, groups, NULL);
624	}
625	EXPORT_SYMBOL(device_add_disk);
626
627	static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
628	{
629	struct block_device *bdev;
630	unsigned long idx;
631
632	/*
633	* On surprise disk removal, bdev_mark_dead() may call into file
634	* systems below. Make it clear that we're expecting to not hold
635	* disk->open_mutex.
636	*/
637	lockdep_assert_not_held(&disk->open_mutex);
638
639	rcu_read_lock();
640	xa_for_each(&disk->part_tbl, idx, bdev) {
641	if (!kobject_get_unless_zero(kobj: &bdev->bd_device.kobj))
642	continue;
643	rcu_read_unlock();
644
645	bdev_mark_dead(bdev, surprise);
646
647	put_device(dev: &bdev->bd_device);
648	rcu_read_lock();
649	}
650	rcu_read_unlock();
651	}
652
653	static bool __blk_mark_disk_dead(struct gendisk *disk)
654	{
655	/*
656	* Fail any new I/O.
657	*/
658	if (test_and_set_bit(GD_DEAD, addr: &disk->state))
659	return false;
660
661	if (test_bit(GD_OWNS_QUEUE, &disk->state))
662	blk_queue_flag_set(flag: QUEUE_FLAG_DYING, q: disk->queue);
663
664	/*
665	* Stop buffered writers from dirtying pages that can't be written out.
666	*/
667	set_capacity(disk, `0`);
668
669	/*
670	* Prevent new I/O from crossing bio_queue_enter().
671	*/
672	return blk_queue_start_drain(q: disk->queue);
673	}
674
675	/**
676	* blk_mark_disk_dead - mark a disk as dead
677	* @disk: disk to mark as dead
678	*
679	* Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
680	* to this disk.
681	*/
682	void blk_mark_disk_dead(struct gendisk *disk)
683	{
684	__blk_mark_disk_dead(disk);
685	blk_report_disk_dead(disk, surprise: true);
686	}
687	EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
688
689	static void __del_gendisk(struct gendisk *disk)
690	{
691	struct request_queue *q = disk->queue;
692	struct block_device *part;
693	unsigned long idx;
694	bool start_drain;
695
696	might_sleep();
697
698	if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
699	return;
700
701	disk_del_events(disk);
702
703	/*
704	* Prevent new openers by unlinked the bdev inode.
705	*/
706	mutex_lock(&disk->open_mutex);
707	xa_for_each(&disk->part_tbl, idx, part)
708	bdev_unhash(bdev: part);
709	mutex_unlock(lock: &disk->open_mutex);
710
711	/*
712	* Tell the file system to write back all dirty data and shut down if
713	* it hasn't been notified earlier.
714	*/
715	if (!test_bit(GD_DEAD, &disk->state))
716	blk_report_disk_dead(disk, surprise: false);
717
718	/*
719	* Drop all partitions now that the disk is marked dead.
720	*/
721	mutex_lock(&disk->open_mutex);
722	start_drain = __blk_mark_disk_dead(disk);
723	if (start_drain)
724	blk_freeze_acquire_lock(q);
725	xa_for_each_start(&disk->part_tbl, idx, part, `1`)
726	drop_partition(part);
727	mutex_unlock(lock: &disk->open_mutex);
728
729	if (!(disk->flags & GENHD_FL_HIDDEN)) {
730	sysfs_remove_link(kobj: &disk_to_dev(disk)->kobj, name: "bdi");
731
732	/*
733	* Unregister bdi before releasing device numbers (as they can
734	* get reused and we'd get clashes in sysfs).
735	*/
736	bdi_unregister(bdi: disk->bdi);
737	}
738
739	blk_unregister_queue(disk);
740
741	kobject_put(kobj: disk->part0->bd_holder_dir);
742	kobject_put(kobj: disk->slave_dir);
743	disk->slave_dir = NULL;
744
745	part_stat_set_all(part: disk->part0, value: `0`);
746	disk->part0->bd_stamp = `0`;
747	sysfs_remove_link(kobj: block_depr, name: dev_name(disk_to_dev(disk)));
748	pm_runtime_set_memalloc_noio(disk_to_dev(disk), enable: false);
749	device_del(disk_to_dev(disk));
750
751	blk_mq_freeze_queue_wait(q);
752
753	blk_throtl_cancel_bios(disk);
754
755	blk_sync_queue(q);
756	blk_flush_integrity();
757
758	if (queue_is_mq(q))
759	blk_mq_cancel_work_sync(q);
760
761	rq_qos_exit(q);
762
763	/*
764	* If the disk does not own the queue, allow using passthrough requests
765	* again. Else leave the queue frozen to fail all I/O.
766	*/
767	if (!test_bit(GD_OWNS_QUEUE, &disk->state))
768	__blk_mq_unfreeze_queue(q, force_atomic: true);
769	else if (queue_is_mq(q))
770	blk_mq_exit_queue(q);
771
772	if (start_drain)
773	blk_unfreeze_release_lock(q);
774	}
775
776	static void disable_elv_switch(struct request_queue *q)
777	{
778	struct blk_mq_tag_set *set = q->tag_set;
779	WARN_ON_ONCE(!queue_is_mq(q));
780
781	down_write(sem: &set->update_nr_hwq_lock);
782	blk_queue_flag_set(flag: QUEUE_FLAG_NO_ELV_SWITCH, q);
783	up_write(sem: &set->update_nr_hwq_lock);
784	}
785
786	/**
787	* del_gendisk - remove the gendisk
788	* @disk: the struct gendisk to remove
789	*
790	* Removes the gendisk and all its associated resources. This deletes the
791	* partitions associated with the gendisk, and unregisters the associated
792	* request_queue.
793	*
794	* This is the counter to the respective __device_add_disk() call.
795	*
796	* The final removal of the struct gendisk happens when its refcount reaches 0
797	* with put_disk(), which should be called after del_gendisk(), if
798	* __device_add_disk() was used.
799	*
800	* Drivers exist which depend on the release of the gendisk to be synchronous,
801	* it should not be deferred.
802	*
803	* Context: can sleep
804	*/
805	void del_gendisk(struct gendisk *disk)
806	{
807	struct blk_mq_tag_set *set;
808	unsigned int memflags;
809
810	if (!queue_is_mq(q: disk->queue)) {
811	__del_gendisk(disk);
812	} else {
813	set = disk->queue->tag_set;
814
815	disable_elv_switch(q: disk->queue);
816
817	memflags = memalloc_noio_save();
818	down_read(sem: &set->update_nr_hwq_lock);
819	__del_gendisk(disk);
820	up_read(sem: &set->update_nr_hwq_lock);
821	memalloc_noio_restore(flags: memflags);
822	}
823	}
824	EXPORT_SYMBOL(del_gendisk);
825
826	/**
827	* invalidate_disk - invalidate the disk
828	* @disk: the struct gendisk to invalidate
829	*
830	* A helper to invalidates the disk. It will clean the disk's associated
831	* buffer/page caches and reset its internal states so that the disk
832	* can be reused by the drivers.
833	*
834	* Context: can sleep
835	*/
836	void invalidate_disk(struct gendisk *disk)
837	{
838	struct block_device *bdev = disk->part0;
839
840	invalidate_bdev(bdev);
841	bdev->bd_mapping->wb_err = `0`;
842	set_capacity(disk, `0`);
843	}
844	EXPORT_SYMBOL(invalidate_disk);
845
846	/ sysfs access to bad-blocks list. /
847	static ssize_t disk_badblocks_show(struct device *dev,
848	struct device_attribute *attr,
849	char *page)
850	{
851	struct gendisk *disk = dev_to_disk(dev);
852
853	if (!disk->bb)
854	return sysfs_emit(buf: page, fmt: "\n");
855
856	return badblocks_show(bb: disk->bb, page, unack: `0`);
857	}
858
859	static ssize_t disk_badblocks_store(struct device *dev,
860	struct device_attribute *attr,
861	const char *page, size_t len)
862	{
863	struct gendisk *disk = dev_to_disk(dev);
864
865	if (!disk->bb)
866	return -ENXIO;
867
868	return badblocks_store(bb: disk->bb, page, len, unack: `0`);
869	}
870
871	#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
872	static bool blk_probe_dev(dev_t devt)
873	{
874	unsigned int major = MAJOR(devt);
875	struct blk_major_name **n;
876
877	mutex_lock(&major_names_lock);
878	for (n = &major_names[major_to_index(major)]; n; n = &(n)->next) {
879	if ((n)->major == major && (n)->probe) {
880	(*n)->probe(devt);
881	mutex_unlock(lock: &major_names_lock);
882	return true;
883	}
884	}
885	mutex_unlock(lock: &major_names_lock);
886	return false;
887	}
888
889	void blk_request_module(dev_t devt)
890	{
891	int error;
892
893	if (blk_probe_dev(devt))
894	return;
895
896	error = request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt));
897	/ Make old-style 2.4 aliases work /
898	if (error > `0`)
899	error = request_module("block-major-%d", MAJOR(devt));
900	if (!error)
901	blk_probe_dev(devt);
902	}
903	#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
904
905	#ifdef CONFIG_PROC_FS
906	/ iterator /
907	static void disk_seqf_start(struct* seq_file seqf, loff_t pos)
908	{
909	loff_t skip = *pos;
910	struct class_dev_iter *iter;
911	struct device *dev;
912
913	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
914	if (!iter)
915	return ERR_PTR(error: -ENOMEM);
916
917	seqf->private = iter;
918	class_dev_iter_init(iter, class: &block_class, NULL, type: &disk_type);
919	do {
920	dev = class_dev_iter_next(iter);
921	if (!dev)
922	return NULL;
923	} while (skip--);
924
925	return dev_to_disk(dev);
926	}
927
928	static void disk_seqf_next(struct* seq_file seqf, void* v, loff_t pos)
929	{
930	struct device *dev;
931
932	(*pos)++;
933	dev = class_dev_iter_next(iter: seqf->private);
934	if (dev)
935	return dev_to_disk(dev);
936
937	return NULL;
938	}
939
940	static void disk_seqf_stop(struct seq_file seqf, void* *v)
941	{
942	struct class_dev_iter *iter = seqf->private;
943
944	/ stop is called even after start failed :-( /
945	if (iter) {
946	class_dev_iter_exit(iter);
947	kfree(objp: iter);
948	seqf->private = NULL;
949	}
950	}
951
952	static void show_partition_start(struct* seq_file seqf, loff_t pos)
953	{
954	void *p;
955
956	p = disk_seqf_start(seqf, pos);
957	if (!IS_ERR_OR_NULL(ptr: p) && !*pos)
958	seq_puts(m: seqf, s: "major minor #blocks name\n\n");
959	return p;
960	}
961
962	static int show_partition(struct seq_file seqf, void* *v)
963	{
964	struct gendisk *sgp = v;
965	struct block_device *part;
966	unsigned long idx;
967
968	if (!get_capacity(disk: sgp) \|\| (sgp->flags & GENHD_FL_HIDDEN))
969	return `0`;
970
971	rcu_read_lock();
972	xa_for_each(&sgp->part_tbl, idx, part) {
973	if (!bdev_nr_sectors(bdev: part))
974	continue;
975	seq_printf(m: seqf, fmt: "%4d %7d %10llu %pg\n",
976	MAJOR(part->bd_dev), MINOR(part->bd_dev),
977	bdev_nr_sectors(bdev: part) >> `1`, part);
978	}
979	rcu_read_unlock();
980	return `0`;
981	}
982
983	static const struct seq_operations partitions_op = {
984	.start = show_partition_start,
985	.next = disk_seqf_next,
986	.stop = disk_seqf_stop,
987	.show = show_partition
988	};
989	#endif
990
991	static int __init genhd_device_init(void)
992	{
993	int error;
994
995	error = class_register(class: &block_class);
996	if (unlikely(error))
997	return error;
998	blk_dev_init();
999
1000	register_blkdev(BLOCK_EXT_MAJOR, "blkext");
1001
1002	/ create top-level block dir /
1003	block_depr = kobject_create_and_add(name: "block", NULL);
1004	return `0`;
1005	}
1006
1007	subsys_initcall(genhd_device_init);
1008
1009	static ssize_t disk_range_show(struct device *dev,
1010	struct device_attribute attr, char* *buf)
1011	{
1012	struct gendisk *disk = dev_to_disk(dev);
1013
1014	return sysfs_emit(buf, fmt: "%d\n", disk->minors);
1015	}
1016
1017	static ssize_t disk_ext_range_show(struct device *dev,
1018	struct device_attribute attr, char* *buf)
1019	{
1020	struct gendisk *disk = dev_to_disk(dev);
1021
1022	return sysfs_emit(buf, fmt: "%d\n",
1023	(disk->flags & GENHD_FL_NO_PART) ? `1` : DISK_MAX_PARTS);
1024	}
1025
1026	static ssize_t disk_removable_show(struct device *dev,
1027	struct device_attribute attr, char* *buf)
1028	{
1029	struct gendisk *disk = dev_to_disk(dev);
1030
1031	return sysfs_emit(buf, fmt: "%d\n",
1032	(disk->flags & GENHD_FL_REMOVABLE ? `1` : `0`));
1033	}
1034
1035	static ssize_t disk_hidden_show(struct device *dev,
1036	struct device_attribute attr, char* *buf)
1037	{
1038	struct gendisk *disk = dev_to_disk(dev);
1039
1040	return sysfs_emit(buf, fmt: "%d\n",
1041	(disk->flags & GENHD_FL_HIDDEN ? `1` : `0`));
1042	}
1043
1044	static ssize_t disk_ro_show(struct device *dev,
1045	struct device_attribute attr, char* *buf)
1046	{
1047	struct gendisk *disk = dev_to_disk(dev);
1048
1049	return sysfs_emit(buf, fmt: "%d\n", get_disk_ro(disk) ? `1` : `0`);
1050	}
1051
1052	ssize_t part_size_show(struct device *dev,
1053	struct device_attribute attr, char* *buf)
1054	{
1055	return sysfs_emit(buf, fmt: "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
1056	}
1057
1058	ssize_t part_stat_show(struct device *dev,
1059	struct device_attribute attr, char* *buf)
1060	{
1061	struct block_device *bdev = dev_to_bdev(dev);
1062	struct disk_stats stat;
1063	unsigned int inflight;
1064
1065	inflight = bdev_count_inflight(bdev);
1066	if (inflight) {
1067	part_stat_lock();
1068	update_io_ticks(part: bdev, now: jiffies, end: true);
1069	part_stat_unlock();
1070	}
1071	part_stat_read_all(part: bdev, stat: &stat);
1072	return sysfs_emit(buf,
1073	fmt: "%8lu %8lu %8llu %8u "
1074	"%8lu %8lu %8llu %8u "
1075	"%8u %8u %8u "
1076	"%8lu %8lu %8llu %8u "
1077	"%8lu %8u"
1078	"\n",
1079	stat.ios[STAT_READ],
1080	stat.merges[STAT_READ],
1081	(unsigned long long)stat.sectors[STAT_READ],
1082	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ], NSEC_PER_MSEC),
1083	stat.ios[STAT_WRITE],
1084	stat.merges[STAT_WRITE],
1085	(unsigned long long)stat.sectors[STAT_WRITE],
1086	(unsigned int)div_u64(dividend: stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
1087	inflight,
1088	jiffies_to_msecs(j: stat.io_ticks),
1089	(unsigned int)div_u64(dividend: stat.nsecs[STAT_READ] +
1090	stat.nsecs[STAT_WRITE] +
1091	stat.nsecs[STAT_DISCARD] +
1092	stat.nsecs[STAT_FLUSH],
1093	NSEC_PER_MSEC),
1094	stat.ios[STAT_DISCARD],
1095	stat.merges[STAT_DISCARD],
1096	(unsigned long long)stat.sectors[STAT_DISCARD],
1097	(unsigned int)div_u64(dividend: stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
1098	stat.ios[STAT_FLUSH],
1099	(unsigned int)div_u64(dividend: stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
1100	}
1101
1102	/*
1103	* Show the number of IOs issued to driver.
1104	* For bio-based device, started from bdev_start_io_acct();
1105	* For rq-based device, started from blk_mq_start_request();
1106	*/
1107	ssize_t part_inflight_show(struct device dev, struct* device_attribute *attr,
1108	char *buf)
1109	{
1110	struct block_device *bdev = dev_to_bdev(dev);
1111	struct request_queue *q = bdev_get_queue(bdev);
1112	unsigned int inflight[`2`] = {`0`};
1113
1114	bdev_count_inflight_rw(part: bdev, inflight, mq_driver: queue_is_mq(q));
1115
1116	return sysfs_emit(buf, fmt: "%8u %8u\n", inflight[READ], inflight[WRITE]);
1117	}
1118
1119	static ssize_t disk_capability_show(struct device *dev,
1120	struct device_attribute attr, char* *buf)
1121	{
1122	dev_warn_once(dev, "the capability attribute has been deprecated.\n");
1123	return sysfs_emit(buf, fmt: "0\n");
1124	}
1125
1126	static ssize_t disk_alignment_offset_show(struct device *dev,
1127	struct device_attribute *attr,
1128	char *buf)
1129	{
1130	struct gendisk *disk = dev_to_disk(dev);
1131
1132	return sysfs_emit(buf, fmt: "%d\n", bdev_alignment_offset(bdev: disk->part0));
1133	}
1134
1135	static ssize_t disk_discard_alignment_show(struct device *dev,
1136	struct device_attribute *attr,
1137	char *buf)
1138	{
1139	struct gendisk *disk = dev_to_disk(dev);
1140
1141	return sysfs_emit(buf, fmt: "%d\n", bdev_alignment_offset(bdev: disk->part0));
1142	}
1143
1144	static ssize_t diskseq_show(struct device *dev,
1145	struct device_attribute attr, char* *buf)
1146	{
1147	struct gendisk *disk = dev_to_disk(dev);
1148
1149	return sysfs_emit(buf, fmt: "%llu\n", disk->diskseq);
1150	}
1151
1152	static ssize_t partscan_show(struct device *dev,
1153	struct device_attribute attr, char* *buf)
1154	{
1155	return sysfs_emit(buf, fmt: "%u\n", disk_has_partscan(dev_to_disk(dev)));
1156	}
1157
1158	static DEVICE_ATTR(range, `0444`, disk_range_show, NULL);
1159	static DEVICE_ATTR(ext_range, `0444`, disk_ext_range_show, NULL);
1160	static DEVICE_ATTR(removable, `0444`, disk_removable_show, NULL);
1161	static DEVICE_ATTR(hidden, `0444`, disk_hidden_show, NULL);
1162	static DEVICE_ATTR(ro, `0444`, disk_ro_show, NULL);
1163	static DEVICE_ATTR(size, `0444`, part_size_show, NULL);
1164	static DEVICE_ATTR(alignment_offset, `0444`, disk_alignment_offset_show, NULL);
1165	static DEVICE_ATTR(discard_alignment, `0444`, disk_discard_alignment_show, NULL);
1166	static DEVICE_ATTR(capability, `0444`, disk_capability_show, NULL);
1167	static DEVICE_ATTR(stat, `0444`, part_stat_show, NULL);
1168	static DEVICE_ATTR(inflight, `0444`, part_inflight_show, NULL);
1169	static DEVICE_ATTR(badblocks, `0644`, disk_badblocks_show, disk_badblocks_store);
1170	static DEVICE_ATTR(diskseq, `0444`, diskseq_show, NULL);
1171	static DEVICE_ATTR(partscan, `0444`, partscan_show, NULL);
1172
1173	#ifdef CONFIG_FAIL_MAKE_REQUEST
1174	ssize_t part_fail_show(struct device *dev,
1175	struct device_attribute attr, char* *buf)
1176	{
1177	return sysfs_emit(buf, fmt: "%d\n",
1178	bdev_test_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL));
1179	}
1180
1181	ssize_t part_fail_store(struct device *dev,
1182	struct device_attribute *attr,
1183	const char *buf, size_t count)
1184	{
1185	int i;
1186
1187	if (count > `0` && sscanf(buf, "%d", &i) > `0`) {
1188	if (i)
1189	bdev_set_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL);
1190	else
1191	bdev_clear_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL);
1192	}
1193	return count;
1194	}
1195
1196	static struct device_attribute dev_attr_fail =
1197	__ATTR(make-it-fail, `0644`, part_fail_show, part_fail_store);
1198	#endif /* CONFIG_FAIL_MAKE_REQUEST */
1199
1200	#ifdef CONFIG_FAIL_IO_TIMEOUT
1201	static struct device_attribute dev_attr_fail_timeout =
1202	__ATTR(io-timeout-fail, `0644`, part_timeout_show, part_timeout_store);
1203	#endif
1204
1205	static struct attribute *disk_attrs[] = {
1206	&dev_attr_range.attr,
1207	&dev_attr_ext_range.attr,
1208	&dev_attr_removable.attr,
1209	&dev_attr_hidden.attr,
1210	&dev_attr_ro.attr,
1211	&dev_attr_size.attr,
1212	&dev_attr_alignment_offset.attr,
1213	&dev_attr_discard_alignment.attr,
1214	&dev_attr_capability.attr,
1215	&dev_attr_stat.attr,
1216	&dev_attr_inflight.attr,
1217	&dev_attr_badblocks.attr,
1218	&dev_attr_events.attr,
1219	&dev_attr_events_async.attr,
1220	&dev_attr_events_poll_msecs.attr,
1221	&dev_attr_diskseq.attr,
1222	&dev_attr_partscan.attr,
1223	#ifdef CONFIG_FAIL_MAKE_REQUEST
1224	&dev_attr_fail.attr,
1225	#endif
1226	#ifdef CONFIG_FAIL_IO_TIMEOUT
1227	&dev_attr_fail_timeout.attr,
1228	#endif
1229	NULL
1230	};
1231
1232	static umode_t disk_visible(struct kobject kobj, struct* attribute a, int* n)
1233	{
1234	struct device dev = container_of(kobj, typeof(dev), kobj);
1235	struct gendisk *disk = dev_to_disk(dev);
1236
1237	if (a == &dev_attr_badblocks.attr && !disk->bb)
1238	return `0`;
1239	return a->mode;
1240	}
1241
1242	static struct attribute_group disk_attr_group = {
1243	.attrs = disk_attrs,
1244	.is_visible = disk_visible,
1245	};
1246
1247	static const struct attribute_group *disk_attr_groups[] = {
1248	&disk_attr_group,
1249	#ifdef CONFIG_BLK_DEV_IO_TRACE
1250	&blk_trace_attr_group,
1251	#endif
1252	#ifdef CONFIG_BLK_DEV_INTEGRITY
1253	&blk_integrity_attr_group,
1254	#endif
1255	NULL
1256	};
1257
1258	/**
1259	* disk_release - releases all allocated resources of the gendisk
1260	* @dev: the device representing this disk
1261	*
1262	* This function releases all allocated resources of the gendisk.
1263	*
1264	* Drivers which used __device_add_disk() have a gendisk with a request_queue
1265	* assigned. Since the request_queue sits on top of the gendisk for these
1266	* drivers we also call blk_put_queue() for them, and we expect the
1267	* request_queue refcount to reach 0 at this point, and so the request_queue
1268	* will also be freed prior to the disk.
1269	*
1270	* Context: can sleep
1271	*/
1272	static void disk_release(struct device *dev)
1273	{
1274	struct gendisk *disk = dev_to_disk(dev);
1275
1276	might_sleep();
1277	WARN_ON_ONCE(disk_live(disk));
1278
1279	blk_trace_remove(q: disk->queue);
1280
1281	/*
1282	* To undo the all initialization from blk_mq_init_allocated_queue in
1283	* case of a probe failure where add_disk is never called we have to
1284	* call blk_mq_exit_queue here. We can't do this for the more common
1285	* teardown case (yet) as the tagset can be gone by the time the disk
1286	* is released once it was added.
1287	*/
1288	if (queue_is_mq(q: disk->queue) &&
1289	test_bit(GD_OWNS_QUEUE, &disk->state) &&
1290	!test_bit(GD_ADDED, &disk->state))
1291	blk_mq_exit_queue(q: disk->queue);
1292
1293	blkcg_exit_disk(disk);
1294
1295	bioset_exit(&disk->bio_split);
1296
1297	disk_release_events(disk);
1298	kfree(objp: disk->random);
1299	disk_free_zone_resources(disk);
1300	xa_destroy(&disk->part_tbl);
1301
1302	disk->queue->disk = NULL;
1303	blk_put_queue(disk->queue);
1304
1305	if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
1306	disk->fops->free_disk(disk);
1307
1308	bdev_drop(bdev: disk->part0); / frees the disk /
1309	}
1310
1311	static int block_uevent(const struct device dev, struct* kobj_uevent_env *env)
1312	{
1313	const struct gendisk *disk = dev_to_disk(dev);
1314
1315	return add_uevent_var(env, format: "DISKSEQ=%llu", disk->diskseq);
1316	}
1317
1318	const struct class block_class = {
1319	.name = "block",
1320	.dev_uevent = block_uevent,
1321	};
1322
1323	static char block_devnode(const* struct device dev, umode_t mode,
1324	kuid_t uid, kgid_t gid)
1325	{
1326	struct gendisk *disk = dev_to_disk(dev);
1327
1328	if (disk->fops->devnode)
1329	return disk->fops->devnode(disk, mode);
1330	return NULL;
1331	}
1332
1333	const struct device_type disk_type = {
1334	.name = "disk",
1335	.groups = disk_attr_groups,
1336	.release = disk_release,
1337	.devnode = block_devnode,
1338	};
1339
1340	#ifdef CONFIG_PROC_FS
1341	/*
1342	* aggregate disk stat collector. Uses the same stats that the sysfs
1343	* entries do, above, but makes them available through one seq_file.
1344	*
1345	* The output looks suspiciously like /proc/partitions with a bunch of
1346	* extra fields.
1347	*/
1348	static int diskstats_show(struct seq_file seqf, void* *v)
1349	{
1350	struct gendisk *gp = v;
1351	struct block_device *hd;
1352	unsigned int inflight;
1353	struct disk_stats stat;
1354	unsigned long idx;
1355
1356	/*
1357	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1358	seq_puts(seqf, "major minor name"
1359	" rio rmerge rsect ruse wio wmerge "
1360	"wsect wuse running use aveq"
1361	"\n\n");
1362	*/
1363
1364	rcu_read_lock();
1365	xa_for_each(&gp->part_tbl, idx, hd) {
1366	if (bdev_is_partition(bdev: hd) && !bdev_nr_sectors(bdev: hd))
1367	continue;
1368
1369	inflight = bdev_count_inflight(hd);
1370	if (inflight) {
1371	part_stat_lock();
1372	update_io_ticks(part: hd, now: jiffies, end: true);
1373	part_stat_unlock();
1374	}
1375	part_stat_read_all(part: hd, stat: &stat);
1376	seq_put_decimal_ull_width(m: seqf, delimiter: "", MAJOR(hd->bd_dev), width: `4`);
1377	seq_put_decimal_ull_width(m: seqf, delimiter: " ", MINOR(hd->bd_dev), width: `7`);
1378	seq_printf(m: seqf, fmt: " %pg", hd);
1379	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.ios[STAT_READ]);
1380	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.merges[STAT_READ]);
1381	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.sectors[STAT_READ]);
1382	seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_READ],
1383	NSEC_PER_MSEC));
1384	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.ios[STAT_WRITE]);
1385	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.merges[STAT_WRITE]);
1386	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.sectors[STAT_WRITE]);
1387	seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_WRITE],
1388	NSEC_PER_MSEC));
1389	seq_put_decimal_ull(m: seqf, delimiter: " ", num: inflight);
1390	seq_put_decimal_ull(m: seqf, delimiter: " ", num: jiffies_to_msecs(j: stat.io_ticks));
1391	seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_READ] +
1392	stat.nsecs[STAT_WRITE] +
1393	stat.nsecs[STAT_DISCARD] +
1394	stat.nsecs[STAT_FLUSH],
1395	NSEC_PER_MSEC));
1396	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.ios[STAT_DISCARD]);
1397	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.merges[STAT_DISCARD]);
1398	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.sectors[STAT_DISCARD]);
1399	seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_DISCARD],
1400	NSEC_PER_MSEC));
1401	seq_put_decimal_ull(m: seqf, delimiter: " ", num: stat.ios[STAT_FLUSH]);
1402	seq_put_decimal_ull(m: seqf, delimiter: " ", num: (unsigned int)div_u64(dividend: stat.nsecs[STAT_FLUSH],
1403	NSEC_PER_MSEC));
1404	seq_putc(m: seqf, c: `'\n'`);
1405	}
1406	rcu_read_unlock();
1407
1408	return `0`;
1409	}
1410
1411	static const struct seq_operations diskstats_op = {
1412	.start = disk_seqf_start,
1413	.next = disk_seqf_next,
1414	.stop = disk_seqf_stop,
1415	.show = diskstats_show
1416	};
1417
1418	static int __init proc_genhd_init(void)
1419	{
1420	proc_create_seq("diskstats", `0`, NULL, &diskstats_op);
1421	proc_create_seq("partitions", `0`, NULL, &partitions_op);
1422	return `0`;
1423	}
1424	module_init(proc_genhd_init);
1425	#endif /* CONFIG_PROC_FS */
1426
1427	dev_t part_devt(struct gendisk *disk, u8 partno)
1428	{
1429	struct block_device *part;
1430	dev_t devt = `0`;
1431
1432	rcu_read_lock();
1433	part = xa_load(&disk->part_tbl, index: partno);
1434	if (part)
1435	devt = part->bd_dev;
1436	rcu_read_unlock();
1437
1438	return devt;
1439	}
1440
1441	struct gendisk __alloc_disk_node(struct* request_queue q, int* node_id,
1442	struct lock_class_key *lkclass)
1443	{
1444	struct gendisk *disk;
1445
1446	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
1447	if (!disk)
1448	return NULL;
1449
1450	if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, `0`, flags: `0`))
1451	goto out_free_disk;
1452
1453	disk->bdi = bdi_alloc(node_id);
1454	if (!disk->bdi)
1455	goto out_free_bioset;
1456
1457	/ bdev_alloc() might need the queue, set before the first call /
1458	disk->queue = q;
1459
1460	disk->part0 = bdev_alloc(disk, partno: `0`);
1461	if (!disk->part0)
1462	goto out_free_bdi;
1463
1464	disk->node_id = node_id;
1465	mutex_init(&disk->open_mutex);
1466	xa_init(xa: &disk->part_tbl);
1467	if (xa_insert(xa: &disk->part_tbl, index: `0`, entry: disk->part0, GFP_KERNEL))
1468	goto out_destroy_part_tbl;
1469
1470	if (blkcg_init_disk(disk))
1471	goto out_erase_part0;
1472
1473	disk_init_zone_resources(disk);
1474	rand_initialize_disk(disk);
1475	disk_to_dev(disk)->class = &block_class;
1476	disk_to_dev(disk)->type = &disk_type;
1477	device_initialize(disk_to_dev(disk));
1478	inc_diskseq(disk);
1479	q->disk = disk;
1480	lockdep_init_map(lock: &disk->lockdep_map, name: "(bio completion)", key: lkclass, subclass: `0`);
1481	#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
1482	INIT_LIST_HEAD(list: &disk->slave_bdevs);
1483	#endif
1484	mutex_init(&disk->rqos_state_mutex);
1485	return disk;
1486
1487	out_erase_part0:
1488	xa_erase(&disk->part_tbl, index: `0`);
1489	out_destroy_part_tbl:
1490	xa_destroy(&disk->part_tbl);
1491	disk->part0->bd_disk = NULL;
1492	bdev_drop(bdev: disk->part0);
1493	out_free_bdi:
1494	bdi_put(bdi: disk->bdi);
1495	out_free_bioset:
1496	bioset_exit(&disk->bio_split);
1497	out_free_disk:
1498	kfree(objp: disk);
1499	return NULL;
1500	}
1501
1502	struct gendisk __blk_alloc_disk(struct* queue_limits lim, int* node,
1503	struct lock_class_key *lkclass)
1504	{
1505	struct queue_limits default_lim = { };
1506	struct request_queue *q;
1507	struct gendisk *disk;
1508
1509	q = blk_alloc_queue(lim: lim ? lim : &default_lim, node_id: node);
1510	if (IS_ERR(ptr: q))
1511	return ERR_CAST(ptr: q);
1512
1513	disk = __alloc_disk_node(q, node_id: node, lkclass);
1514	if (!disk) {
1515	blk_put_queue(q);
1516	return ERR_PTR(error: -ENOMEM);
1517	}
1518	set_bit(GD_OWNS_QUEUE, addr: &disk->state);
1519	return disk;
1520	}
1521	EXPORT_SYMBOL(__blk_alloc_disk);
1522
1523	/**
1524	* put_disk - decrements the gendisk refcount
1525	* @disk: the struct gendisk to decrement the refcount for
1526	*
1527	* This decrements the refcount for the struct gendisk. When this reaches 0
1528	* we'll have disk_release() called.
1529	*
1530	* Note: for blk-mq disk put_disk must be called before freeing the tag_set
1531	* when handling probe errors (that is before add_disk() is called).
1532	*
1533	* Context: Any context, but the last reference must not be dropped from
1534	* atomic context.
1535	*/
1536	void put_disk(struct gendisk *disk)
1537	{
1538	if (disk)
1539	put_device(disk_to_dev(disk));
1540	}
1541	EXPORT_SYMBOL(put_disk);
1542
1543	static void set_disk_ro_uevent(struct gendisk gd, int* ro)
1544	{
1545	char event[] = "DISK_RO=1";
1546	char *envp[] = { event, NULL };
1547
1548	if (!ro)
1549	event[`8`] = `'0'`;
1550	kobject_uevent_env(kobj: &disk_to_dev(gd)->kobj, action: KOBJ_CHANGE, envp);
1551	}
1552
1553	/**
1554	* set_disk_ro - set a gendisk read-only
1555	* @disk: gendisk to operate on
1556	* @read_only: %true to set the disk read-only, %false set the disk read/write
1557	*
1558	* This function is used to indicate whether a given disk device should have its
1559	* read-only flag set. set_disk_ro() is typically used by device drivers to
1560	* indicate whether the underlying physical device is write-protected.
1561	*/
1562	void set_disk_ro(struct gendisk *disk, bool read_only)
1563	{
1564	if (read_only) {
1565	if (test_and_set_bit(GD_READ_ONLY, addr: &disk->state))
1566	return;
1567	} else {
1568	if (!test_and_clear_bit(GD_READ_ONLY, addr: &disk->state))
1569	return;
1570	}
1571	set_disk_ro_uevent(gd: disk, ro: read_only);
1572	}
1573	EXPORT_SYMBOL(set_disk_ro);
1574
1575	void inc_diskseq(struct gendisk *disk)
1576	{
1577	disk->diskseq = atomic64_inc_return(v: &diskseq);
1578	}
1579

Provided by KDAB

Definitions

block_depr
diskseq
ext_devt_ida
set_capacity
set_capacity_and_notify
part_stat_read_all
bdev_count_inflight_rw
bdev_count_inflight
blk_major_name
major_names
major_names_lock
major_names_spinlock
major_to_index
blkdev_show
__register_blkdev
unregister_blkdev
blk_alloc_ext_minor
blk_free_ext_minor
disk_uevent
disk_scan_partitions
add_disk_final
__add_disk
add_disk_fwnode
device_add_disk
blk_report_disk_dead
__blk_mark_disk_dead
blk_mark_disk_dead
__del_gendisk
disable_elv_switch
del_gendisk
invalidate_disk
disk_badblocks_show
disk_badblocks_store
blk_probe_dev
blk_request_module
disk_seqf_start
disk_seqf_next
disk_seqf_stop
show_partition_start
show_partition
partitions_op
genhd_device_init
disk_range_show
disk_ext_range_show
disk_removable_show
disk_hidden_show
disk_ro_show
part_size_show
part_stat_show
part_inflight_show
disk_capability_show
disk_alignment_offset_show
disk_discard_alignment_show
diskseq_show
partscan_show
part_fail_show
part_fail_store
dev_attr_fail
dev_attr_fail_timeout
disk_attrs
disk_visible
disk_attr_group
disk_attr_groups
disk_release
block_uevent
block_class
block_devnode
disk_type
diskstats_show
diskstats_op
proc_genhd_init
part_devt
__alloc_disk_node
__blk_alloc_disk
put_disk
set_disk_ro_uevent
set_disk_ro

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/block/genhd.c