1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Disk events - monitor disk events like media change and eject request. |
4 | */ |
5 | #include <linux/export.h> |
6 | #include <linux/moduleparam.h> |
7 | #include <linux/blkdev.h> |
8 | #include "blk.h" |
9 | |
10 | struct disk_events { |
11 | struct list_head node; /* all disk_event's */ |
12 | struct gendisk *disk; /* the associated disk */ |
13 | spinlock_t lock; |
14 | |
15 | struct mutex block_mutex; /* protects blocking */ |
16 | int block; /* event blocking depth */ |
17 | unsigned int pending; /* events already sent out */ |
18 | unsigned int clearing; /* events being cleared */ |
19 | |
20 | long poll_msecs; /* interval, -1 for default */ |
21 | struct delayed_work dwork; |
22 | }; |
23 | |
24 | static const char *disk_events_strs[] = { |
25 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change" , |
26 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request" , |
27 | }; |
28 | |
29 | static char *disk_uevents[] = { |
30 | [ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1" , |
31 | [ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1" , |
32 | }; |
33 | |
34 | /* list of all disk_events */ |
35 | static DEFINE_MUTEX(disk_events_mutex); |
36 | static LIST_HEAD(disk_events); |
37 | |
38 | /* disable in-kernel polling by default */ |
39 | static unsigned long disk_events_dfl_poll_msecs; |
40 | |
41 | static unsigned long disk_events_poll_jiffies(struct gendisk *disk) |
42 | { |
43 | struct disk_events *ev = disk->ev; |
44 | long intv_msecs = 0; |
45 | |
46 | /* |
47 | * If device-specific poll interval is set, always use it. If |
48 | * the default is being used, poll if the POLL flag is set. |
49 | */ |
50 | if (ev->poll_msecs >= 0) |
51 | intv_msecs = ev->poll_msecs; |
52 | else if (disk->event_flags & DISK_EVENT_FLAG_POLL) |
53 | intv_msecs = disk_events_dfl_poll_msecs; |
54 | |
55 | return msecs_to_jiffies(m: intv_msecs); |
56 | } |
57 | |
58 | /** |
59 | * disk_block_events - block and flush disk event checking |
60 | * @disk: disk to block events for |
61 | * |
62 | * On return from this function, it is guaranteed that event checking |
63 | * isn't in progress and won't happen until unblocked by |
64 | * disk_unblock_events(). Events blocking is counted and the actual |
65 | * unblocking happens after the matching number of unblocks are done. |
66 | * |
67 | * Note that this intentionally does not block event checking from |
68 | * disk_clear_events(). |
69 | * |
70 | * CONTEXT: |
71 | * Might sleep. |
72 | */ |
73 | void disk_block_events(struct gendisk *disk) |
74 | { |
75 | struct disk_events *ev = disk->ev; |
76 | unsigned long flags; |
77 | bool cancel; |
78 | |
79 | if (!ev) |
80 | return; |
81 | |
82 | /* |
83 | * Outer mutex ensures that the first blocker completes canceling |
84 | * the event work before further blockers are allowed to finish. |
85 | */ |
86 | mutex_lock(&ev->block_mutex); |
87 | |
88 | spin_lock_irqsave(&ev->lock, flags); |
89 | cancel = !ev->block++; |
90 | spin_unlock_irqrestore(lock: &ev->lock, flags); |
91 | |
92 | if (cancel) |
93 | cancel_delayed_work_sync(dwork: &disk->ev->dwork); |
94 | |
95 | mutex_unlock(lock: &ev->block_mutex); |
96 | } |
97 | |
98 | static void __disk_unblock_events(struct gendisk *disk, bool check_now) |
99 | { |
100 | struct disk_events *ev = disk->ev; |
101 | unsigned long intv; |
102 | unsigned long flags; |
103 | |
104 | spin_lock_irqsave(&ev->lock, flags); |
105 | |
106 | if (WARN_ON_ONCE(ev->block <= 0)) |
107 | goto out_unlock; |
108 | |
109 | if (--ev->block) |
110 | goto out_unlock; |
111 | |
112 | intv = disk_events_poll_jiffies(disk); |
113 | if (check_now) |
114 | queue_delayed_work(wq: system_freezable_power_efficient_wq, |
115 | dwork: &ev->dwork, delay: 0); |
116 | else if (intv) |
117 | queue_delayed_work(wq: system_freezable_power_efficient_wq, |
118 | dwork: &ev->dwork, delay: intv); |
119 | out_unlock: |
120 | spin_unlock_irqrestore(lock: &ev->lock, flags); |
121 | } |
122 | |
123 | /** |
124 | * disk_unblock_events - unblock disk event checking |
125 | * @disk: disk to unblock events for |
126 | * |
127 | * Undo disk_block_events(). When the block count reaches zero, it |
128 | * starts events polling if configured. |
129 | * |
130 | * CONTEXT: |
131 | * Don't care. Safe to call from irq context. |
132 | */ |
133 | void disk_unblock_events(struct gendisk *disk) |
134 | { |
135 | if (disk->ev) |
136 | __disk_unblock_events(disk, check_now: false); |
137 | } |
138 | |
139 | /** |
140 | * disk_flush_events - schedule immediate event checking and flushing |
141 | * @disk: disk to check and flush events for |
142 | * @mask: events to flush |
143 | * |
144 | * Schedule immediate event checking on @disk if not blocked. Events in |
145 | * @mask are scheduled to be cleared from the driver. Note that this |
146 | * doesn't clear the events from @disk->ev. |
147 | * |
148 | * CONTEXT: |
149 | * If @mask is non-zero must be called with disk->open_mutex held. |
150 | */ |
151 | void disk_flush_events(struct gendisk *disk, unsigned int mask) |
152 | { |
153 | struct disk_events *ev = disk->ev; |
154 | |
155 | if (!ev) |
156 | return; |
157 | |
158 | spin_lock_irq(lock: &ev->lock); |
159 | ev->clearing |= mask; |
160 | if (!ev->block) |
161 | mod_delayed_work(wq: system_freezable_power_efficient_wq, |
162 | dwork: &ev->dwork, delay: 0); |
163 | spin_unlock_irq(lock: &ev->lock); |
164 | } |
165 | |
166 | /* |
167 | * Tell userland about new events. Only the events listed in @disk->events are |
168 | * reported, and only if DISK_EVENT_FLAG_UEVENT is set. Otherwise, events are |
169 | * processed internally but never get reported to userland. |
170 | */ |
171 | static void disk_event_uevent(struct gendisk *disk, unsigned int events) |
172 | { |
173 | char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; |
174 | int nr_events = 0, i; |
175 | |
176 | for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) |
177 | if (events & disk->events & (1 << i)) |
178 | envp[nr_events++] = disk_uevents[i]; |
179 | |
180 | if (nr_events) |
181 | kobject_uevent_env(kobj: &disk_to_dev(disk)->kobj, action: KOBJ_CHANGE, envp); |
182 | } |
183 | |
184 | static void disk_check_events(struct disk_events *ev, |
185 | unsigned int *clearing_ptr) |
186 | { |
187 | struct gendisk *disk = ev->disk; |
188 | unsigned int clearing = *clearing_ptr; |
189 | unsigned int events; |
190 | unsigned long intv; |
191 | |
192 | /* check events */ |
193 | events = disk->fops->check_events(disk, clearing); |
194 | |
195 | /* accumulate pending events and schedule next poll if necessary */ |
196 | spin_lock_irq(lock: &ev->lock); |
197 | |
198 | events &= ~ev->pending; |
199 | ev->pending |= events; |
200 | *clearing_ptr &= ~clearing; |
201 | |
202 | intv = disk_events_poll_jiffies(disk); |
203 | if (!ev->block && intv) |
204 | queue_delayed_work(wq: system_freezable_power_efficient_wq, |
205 | dwork: &ev->dwork, delay: intv); |
206 | |
207 | spin_unlock_irq(lock: &ev->lock); |
208 | |
209 | if (events & DISK_EVENT_MEDIA_CHANGE) |
210 | inc_diskseq(disk); |
211 | |
212 | if (disk->event_flags & DISK_EVENT_FLAG_UEVENT) |
213 | disk_event_uevent(disk, events); |
214 | } |
215 | |
216 | /** |
217 | * disk_clear_events - synchronously check, clear and return pending events |
218 | * @disk: disk to fetch and clear events from |
219 | * @mask: mask of events to be fetched and cleared |
220 | * |
221 | * Disk events are synchronously checked and pending events in @mask |
222 | * are cleared and returned. This ignores the block count. |
223 | * |
224 | * CONTEXT: |
225 | * Might sleep. |
226 | */ |
227 | static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) |
228 | { |
229 | struct disk_events *ev = disk->ev; |
230 | unsigned int pending; |
231 | unsigned int clearing = mask; |
232 | |
233 | if (!ev) |
234 | return 0; |
235 | |
236 | disk_block_events(disk); |
237 | |
238 | /* |
239 | * store the union of mask and ev->clearing on the stack so that the |
240 | * race with disk_flush_events does not cause ambiguity (ev->clearing |
241 | * can still be modified even if events are blocked). |
242 | */ |
243 | spin_lock_irq(lock: &ev->lock); |
244 | clearing |= ev->clearing; |
245 | ev->clearing = 0; |
246 | spin_unlock_irq(lock: &ev->lock); |
247 | |
248 | disk_check_events(ev, clearing_ptr: &clearing); |
249 | /* |
250 | * if ev->clearing is not 0, the disk_flush_events got called in the |
251 | * middle of this function, so we want to run the workfn without delay. |
252 | */ |
253 | __disk_unblock_events(disk, check_now: ev->clearing ? true : false); |
254 | |
255 | /* then, fetch and clear pending events */ |
256 | spin_lock_irq(lock: &ev->lock); |
257 | pending = ev->pending & mask; |
258 | ev->pending &= ~mask; |
259 | spin_unlock_irq(lock: &ev->lock); |
260 | WARN_ON_ONCE(clearing & mask); |
261 | |
262 | return pending; |
263 | } |
264 | |
265 | /** |
266 | * disk_check_media_change - check if a removable media has been changed |
267 | * @disk: gendisk to check |
268 | * |
269 | * Returns %true and marks the disk for a partition rescan whether a removable |
270 | * media has been changed, and %false if the media did not change. |
271 | */ |
272 | bool disk_check_media_change(struct gendisk *disk) |
273 | { |
274 | unsigned int events; |
275 | |
276 | events = disk_clear_events(disk, mask: DISK_EVENT_MEDIA_CHANGE | |
277 | DISK_EVENT_EJECT_REQUEST); |
278 | if (events & DISK_EVENT_MEDIA_CHANGE) { |
279 | set_bit(GD_NEED_PART_SCAN, addr: &disk->state); |
280 | return true; |
281 | } |
282 | return false; |
283 | } |
284 | EXPORT_SYMBOL(disk_check_media_change); |
285 | |
286 | /** |
287 | * disk_force_media_change - force a media change event |
288 | * @disk: the disk which will raise the event |
289 | * |
290 | * Should be called when the media changes for @disk. Generates a uevent |
291 | * and attempts to free all dentries and inodes and invalidates all block |
292 | * device page cache entries in that case. |
293 | */ |
294 | void disk_force_media_change(struct gendisk *disk) |
295 | { |
296 | disk_event_uevent(disk, events: DISK_EVENT_MEDIA_CHANGE); |
297 | inc_diskseq(disk); |
298 | bdev_mark_dead(bdev: disk->part0, surprise: true); |
299 | set_bit(GD_NEED_PART_SCAN, addr: &disk->state); |
300 | } |
301 | EXPORT_SYMBOL_GPL(disk_force_media_change); |
302 | |
303 | /* |
304 | * Separate this part out so that a different pointer for clearing_ptr can be |
305 | * passed in for disk_clear_events. |
306 | */ |
307 | static void disk_events_workfn(struct work_struct *work) |
308 | { |
309 | struct delayed_work *dwork = to_delayed_work(work); |
310 | struct disk_events *ev = container_of(dwork, struct disk_events, dwork); |
311 | |
312 | disk_check_events(ev, clearing_ptr: &ev->clearing); |
313 | } |
314 | |
315 | /* |
316 | * A disk events enabled device has the following sysfs nodes under |
317 | * its /sys/block/X/ directory. |
318 | * |
319 | * events : list of all supported events |
320 | * events_async : list of events which can be detected w/o polling |
321 | * (always empty, only for backwards compatibility) |
322 | * events_poll_msecs : polling interval, 0: disable, -1: system default |
323 | */ |
324 | static ssize_t __disk_events_show(unsigned int events, char *buf) |
325 | { |
326 | const char *delim = "" ; |
327 | ssize_t pos = 0; |
328 | int i; |
329 | |
330 | for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++) |
331 | if (events & (1 << i)) { |
332 | pos += sprintf(buf: buf + pos, fmt: "%s%s" , |
333 | delim, disk_events_strs[i]); |
334 | delim = " " ; |
335 | } |
336 | if (pos) |
337 | pos += sprintf(buf: buf + pos, fmt: "\n" ); |
338 | return pos; |
339 | } |
340 | |
341 | static ssize_t disk_events_show(struct device *dev, |
342 | struct device_attribute *attr, char *buf) |
343 | { |
344 | struct gendisk *disk = dev_to_disk(dev); |
345 | |
346 | if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT)) |
347 | return 0; |
348 | return __disk_events_show(events: disk->events, buf); |
349 | } |
350 | |
351 | static ssize_t disk_events_async_show(struct device *dev, |
352 | struct device_attribute *attr, char *buf) |
353 | { |
354 | return 0; |
355 | } |
356 | |
357 | static ssize_t disk_events_poll_msecs_show(struct device *dev, |
358 | struct device_attribute *attr, |
359 | char *buf) |
360 | { |
361 | struct gendisk *disk = dev_to_disk(dev); |
362 | |
363 | if (!disk->ev) |
364 | return sprintf(buf, fmt: "-1\n" ); |
365 | return sprintf(buf, fmt: "%ld\n" , disk->ev->poll_msecs); |
366 | } |
367 | |
368 | static ssize_t disk_events_poll_msecs_store(struct device *dev, |
369 | struct device_attribute *attr, |
370 | const char *buf, size_t count) |
371 | { |
372 | struct gendisk *disk = dev_to_disk(dev); |
373 | long intv; |
374 | |
375 | if (!count || !sscanf(buf, "%ld" , &intv)) |
376 | return -EINVAL; |
377 | |
378 | if (intv < 0 && intv != -1) |
379 | return -EINVAL; |
380 | |
381 | if (!disk->ev) |
382 | return -ENODEV; |
383 | |
384 | disk_block_events(disk); |
385 | disk->ev->poll_msecs = intv; |
386 | __disk_unblock_events(disk, check_now: true); |
387 | return count; |
388 | } |
389 | |
390 | DEVICE_ATTR(events, 0444, disk_events_show, NULL); |
391 | DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL); |
392 | DEVICE_ATTR(events_poll_msecs, 0644, disk_events_poll_msecs_show, |
393 | disk_events_poll_msecs_store); |
394 | |
395 | /* |
396 | * The default polling interval can be specified by the kernel |
397 | * parameter block.events_dfl_poll_msecs which defaults to 0 |
398 | * (disable). This can also be modified runtime by writing to |
399 | * /sys/module/block/parameters/events_dfl_poll_msecs. |
400 | */ |
401 | static int disk_events_set_dfl_poll_msecs(const char *val, |
402 | const struct kernel_param *kp) |
403 | { |
404 | struct disk_events *ev; |
405 | int ret; |
406 | |
407 | ret = param_set_ulong(val, kp); |
408 | if (ret < 0) |
409 | return ret; |
410 | |
411 | mutex_lock(&disk_events_mutex); |
412 | list_for_each_entry(ev, &disk_events, node) |
413 | disk_flush_events(disk: ev->disk, mask: 0); |
414 | mutex_unlock(lock: &disk_events_mutex); |
415 | return 0; |
416 | } |
417 | |
418 | static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = { |
419 | .set = disk_events_set_dfl_poll_msecs, |
420 | .get = param_get_ulong, |
421 | }; |
422 | |
423 | #undef MODULE_PARAM_PREFIX |
424 | #define MODULE_PARAM_PREFIX "block." |
425 | |
426 | module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops, |
427 | &disk_events_dfl_poll_msecs, 0644); |
428 | |
429 | /* |
430 | * disk_{alloc|add|del|release}_events - initialize and destroy disk_events. |
431 | */ |
432 | int disk_alloc_events(struct gendisk *disk) |
433 | { |
434 | struct disk_events *ev; |
435 | |
436 | if (!disk->fops->check_events || !disk->events) |
437 | return 0; |
438 | |
439 | ev = kzalloc(size: sizeof(*ev), GFP_KERNEL); |
440 | if (!ev) { |
441 | pr_warn("%s: failed to initialize events\n" , disk->disk_name); |
442 | return -ENOMEM; |
443 | } |
444 | |
445 | INIT_LIST_HEAD(list: &ev->node); |
446 | ev->disk = disk; |
447 | spin_lock_init(&ev->lock); |
448 | mutex_init(&ev->block_mutex); |
449 | ev->block = 1; |
450 | ev->poll_msecs = -1; |
451 | INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn); |
452 | |
453 | disk->ev = ev; |
454 | return 0; |
455 | } |
456 | |
457 | void disk_add_events(struct gendisk *disk) |
458 | { |
459 | if (!disk->ev) |
460 | return; |
461 | |
462 | mutex_lock(&disk_events_mutex); |
463 | list_add_tail(new: &disk->ev->node, head: &disk_events); |
464 | mutex_unlock(lock: &disk_events_mutex); |
465 | |
466 | /* |
467 | * Block count is initialized to 1 and the following initial |
468 | * unblock kicks it into action. |
469 | */ |
470 | __disk_unblock_events(disk, check_now: true); |
471 | } |
472 | |
473 | void disk_del_events(struct gendisk *disk) |
474 | { |
475 | if (disk->ev) { |
476 | disk_block_events(disk); |
477 | |
478 | mutex_lock(&disk_events_mutex); |
479 | list_del_init(entry: &disk->ev->node); |
480 | mutex_unlock(lock: &disk_events_mutex); |
481 | } |
482 | } |
483 | |
484 | void disk_release_events(struct gendisk *disk) |
485 | { |
486 | /* the block count should be 1 from disk_del_events() */ |
487 | WARN_ON_ONCE(disk->ev && disk->ev->block != 1); |
488 | kfree(objp: disk->ev); |
489 | } |
490 | |