1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef INT_BLK_MQ_H |
3 | #define INT_BLK_MQ_H |
4 | |
5 | #include <linux/blk-mq.h> |
6 | #include "blk-stat.h" |
7 | |
8 | struct blk_mq_tag_set; |
9 | |
10 | struct blk_mq_ctxs { |
11 | struct kobject kobj; |
12 | struct blk_mq_ctx __percpu *queue_ctx; |
13 | }; |
14 | |
15 | /** |
16 | * struct blk_mq_ctx - State for a software queue facing the submitting CPUs |
17 | */ |
18 | struct blk_mq_ctx { |
19 | struct { |
20 | spinlock_t lock; |
21 | struct list_head rq_lists[HCTX_MAX_TYPES]; |
22 | } ____cacheline_aligned_in_smp; |
23 | |
24 | unsigned int cpu; |
25 | unsigned short index_hw[HCTX_MAX_TYPES]; |
26 | struct blk_mq_hw_ctx *hctxs[HCTX_MAX_TYPES]; |
27 | |
28 | struct request_queue *queue; |
29 | struct blk_mq_ctxs *ctxs; |
30 | struct kobject kobj; |
31 | } ____cacheline_aligned_in_smp; |
32 | |
33 | enum { |
34 | BLK_MQ_NO_TAG = -1U, |
35 | BLK_MQ_TAG_MIN = 1, |
36 | BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, |
37 | }; |
38 | |
39 | typedef unsigned int __bitwise blk_insert_t; |
40 | #define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01) |
41 | |
42 | void blk_mq_submit_bio(struct bio *bio); |
43 | int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, |
44 | unsigned int flags); |
45 | void blk_mq_exit_queue(struct request_queue *q); |
46 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); |
47 | void blk_mq_wake_waiters(struct request_queue *q); |
48 | bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *, |
49 | unsigned int); |
50 | void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); |
51 | struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, |
52 | struct blk_mq_ctx *start); |
53 | void blk_mq_put_rq_ref(struct request *rq); |
54 | |
55 | /* |
56 | * Internal helpers for allocating/freeing the request map |
57 | */ |
58 | void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, |
59 | unsigned int hctx_idx); |
60 | void blk_mq_free_rq_map(struct blk_mq_tags *tags); |
61 | struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set, |
62 | unsigned int hctx_idx, unsigned int depth); |
63 | void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set, |
64 | struct blk_mq_tags *tags, |
65 | unsigned int hctx_idx); |
66 | |
67 | /* |
68 | * CPU -> queue mappings |
69 | */ |
70 | extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int); |
71 | |
72 | /* |
73 | * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue |
74 | * @q: request queue |
75 | * @type: the hctx type index |
76 | * @cpu: CPU |
77 | */ |
78 | static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, |
79 | enum hctx_type type, |
80 | unsigned int cpu) |
81 | { |
82 | return xa_load(&q->hctx_table, index: q->tag_set->map[type].mq_map[cpu]); |
83 | } |
84 | |
85 | static inline enum hctx_type blk_mq_get_hctx_type(blk_opf_t opf) |
86 | { |
87 | enum hctx_type type = HCTX_TYPE_DEFAULT; |
88 | |
89 | /* |
90 | * The caller ensure that if REQ_POLLED, poll must be enabled. |
91 | */ |
92 | if (opf & REQ_POLLED) |
93 | type = HCTX_TYPE_POLL; |
94 | else if ((opf & REQ_OP_MASK) == REQ_OP_READ) |
95 | type = HCTX_TYPE_READ; |
96 | return type; |
97 | } |
98 | |
99 | /* |
100 | * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue |
101 | * @q: request queue |
102 | * @opf: operation type (REQ_OP_*) and flags (e.g. REQ_POLLED). |
103 | * @ctx: software queue cpu ctx |
104 | */ |
105 | static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, |
106 | blk_opf_t opf, |
107 | struct blk_mq_ctx *ctx) |
108 | { |
109 | return ctx->hctxs[blk_mq_get_hctx_type(opf)]; |
110 | } |
111 | |
112 | /* |
113 | * sysfs helpers |
114 | */ |
115 | extern void blk_mq_sysfs_init(struct request_queue *q); |
116 | extern void blk_mq_sysfs_deinit(struct request_queue *q); |
117 | int blk_mq_sysfs_register(struct gendisk *disk); |
118 | void blk_mq_sysfs_unregister(struct gendisk *disk); |
119 | int blk_mq_sysfs_register_hctxs(struct request_queue *q); |
120 | void blk_mq_sysfs_unregister_hctxs(struct request_queue *q); |
121 | extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); |
122 | void blk_mq_free_plug_rqs(struct blk_plug *plug); |
123 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); |
124 | |
125 | void blk_mq_cancel_work_sync(struct request_queue *q); |
126 | |
127 | void blk_mq_release(struct request_queue *q); |
128 | |
129 | static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, |
130 | unsigned int cpu) |
131 | { |
132 | return per_cpu_ptr(q->queue_ctx, cpu); |
133 | } |
134 | |
135 | /* |
136 | * This assumes per-cpu software queueing queues. They could be per-node |
137 | * as well, for instance. For now this is hardcoded as-is. Note that we don't |
138 | * care about preemption, since we know the ctx's are persistent. This does |
139 | * mean that we can't rely on ctx always matching the currently running CPU. |
140 | */ |
141 | static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) |
142 | { |
143 | return __blk_mq_get_ctx(q, raw_smp_processor_id()); |
144 | } |
145 | |
146 | struct blk_mq_alloc_data { |
147 | /* input parameter */ |
148 | struct request_queue *q; |
149 | blk_mq_req_flags_t flags; |
150 | unsigned int shallow_depth; |
151 | blk_opf_t cmd_flags; |
152 | req_flags_t rq_flags; |
153 | |
154 | /* allocate multiple requests/tags in one go */ |
155 | unsigned int nr_tags; |
156 | struct request **cached_rq; |
157 | |
158 | /* input & output parameter */ |
159 | struct blk_mq_ctx *ctx; |
160 | struct blk_mq_hw_ctx *hctx; |
161 | }; |
162 | |
163 | struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, |
164 | unsigned int reserved_tags, int node, int alloc_policy); |
165 | void blk_mq_free_tags(struct blk_mq_tags *tags); |
166 | int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags, |
167 | struct sbitmap_queue *breserved_tags, unsigned int queue_depth, |
168 | unsigned int reserved, int node, int alloc_policy); |
169 | |
170 | unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); |
171 | unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags, |
172 | unsigned int *offset); |
173 | void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, |
174 | unsigned int tag); |
175 | void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags); |
176 | int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, |
177 | struct blk_mq_tags **tags, unsigned int depth, bool can_grow); |
178 | void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, |
179 | unsigned int size); |
180 | void blk_mq_tag_update_sched_shared_tags(struct request_queue *q); |
181 | |
182 | void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); |
183 | void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn, |
184 | void *priv); |
185 | void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, |
186 | void *priv); |
187 | |
188 | static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt, |
189 | struct blk_mq_hw_ctx *hctx) |
190 | { |
191 | if (!hctx) |
192 | return &bt->ws[0]; |
193 | return sbq_wait_ptr(sbq: bt, wait_index: &hctx->wait_index); |
194 | } |
195 | |
196 | void __blk_mq_tag_busy(struct blk_mq_hw_ctx *); |
197 | void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); |
198 | |
199 | static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) |
200 | { |
201 | if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) |
202 | __blk_mq_tag_busy(hctx); |
203 | } |
204 | |
205 | static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) |
206 | { |
207 | if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) |
208 | __blk_mq_tag_idle(hctx); |
209 | } |
210 | |
211 | static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags, |
212 | unsigned int tag) |
213 | { |
214 | return tag < tags->nr_reserved_tags; |
215 | } |
216 | |
217 | static inline bool blk_mq_is_shared_tags(unsigned int flags) |
218 | { |
219 | return flags & BLK_MQ_F_TAG_HCTX_SHARED; |
220 | } |
221 | |
222 | static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) |
223 | { |
224 | if (data->rq_flags & RQF_SCHED_TAGS) |
225 | return data->hctx->sched_tags; |
226 | return data->hctx->tags; |
227 | } |
228 | |
229 | static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) |
230 | { |
231 | return test_bit(BLK_MQ_S_STOPPED, &hctx->state); |
232 | } |
233 | |
234 | static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) |
235 | { |
236 | return hctx->nr_ctx && hctx->tags; |
237 | } |
238 | |
239 | unsigned int blk_mq_in_flight(struct request_queue *q, |
240 | struct block_device *part); |
241 | void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, |
242 | unsigned int inflight[2]); |
243 | |
244 | static inline void blk_mq_put_dispatch_budget(struct request_queue *q, |
245 | int budget_token) |
246 | { |
247 | if (q->mq_ops->put_budget) |
248 | q->mq_ops->put_budget(q, budget_token); |
249 | } |
250 | |
251 | static inline int blk_mq_get_dispatch_budget(struct request_queue *q) |
252 | { |
253 | if (q->mq_ops->get_budget) |
254 | return q->mq_ops->get_budget(q); |
255 | return 0; |
256 | } |
257 | |
258 | static inline void blk_mq_set_rq_budget_token(struct request *rq, int token) |
259 | { |
260 | if (token < 0) |
261 | return; |
262 | |
263 | if (rq->q->mq_ops->set_rq_budget_token) |
264 | rq->q->mq_ops->set_rq_budget_token(rq, token); |
265 | } |
266 | |
267 | static inline int blk_mq_get_rq_budget_token(struct request *rq) |
268 | { |
269 | if (rq->q->mq_ops->get_rq_budget_token) |
270 | return rq->q->mq_ops->get_rq_budget_token(rq); |
271 | return -1; |
272 | } |
273 | |
274 | static inline void __blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx, |
275 | int val) |
276 | { |
277 | if (blk_mq_is_shared_tags(flags: hctx->flags)) |
278 | atomic_add(i: val, v: &hctx->queue->nr_active_requests_shared_tags); |
279 | else |
280 | atomic_add(i: val, v: &hctx->nr_active); |
281 | } |
282 | |
283 | static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) |
284 | { |
285 | __blk_mq_add_active_requests(hctx, val: 1); |
286 | } |
287 | |
288 | static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, |
289 | int val) |
290 | { |
291 | if (blk_mq_is_shared_tags(flags: hctx->flags)) |
292 | atomic_sub(i: val, v: &hctx->queue->nr_active_requests_shared_tags); |
293 | else |
294 | atomic_sub(i: val, v: &hctx->nr_active); |
295 | } |
296 | |
297 | static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) |
298 | { |
299 | __blk_mq_sub_active_requests(hctx, val: 1); |
300 | } |
301 | |
302 | static inline void blk_mq_add_active_requests(struct blk_mq_hw_ctx *hctx, |
303 | int val) |
304 | { |
305 | if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) |
306 | __blk_mq_add_active_requests(hctx, val); |
307 | } |
308 | |
309 | static inline void blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) |
310 | { |
311 | if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) |
312 | __blk_mq_inc_active_requests(hctx); |
313 | } |
314 | |
315 | static inline void blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx, |
316 | int val) |
317 | { |
318 | if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) |
319 | __blk_mq_sub_active_requests(hctx, val); |
320 | } |
321 | |
322 | static inline void blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx) |
323 | { |
324 | if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) |
325 | __blk_mq_dec_active_requests(hctx); |
326 | } |
327 | |
328 | static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx) |
329 | { |
330 | if (blk_mq_is_shared_tags(flags: hctx->flags)) |
331 | return atomic_read(v: &hctx->queue->nr_active_requests_shared_tags); |
332 | return atomic_read(v: &hctx->nr_active); |
333 | } |
334 | static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, |
335 | struct request *rq) |
336 | { |
337 | blk_mq_dec_active_requests(hctx); |
338 | blk_mq_put_tag(tags: hctx->tags, ctx: rq->mq_ctx, tag: rq->tag); |
339 | rq->tag = BLK_MQ_NO_TAG; |
340 | } |
341 | |
342 | static inline void blk_mq_put_driver_tag(struct request *rq) |
343 | { |
344 | if (rq->tag == BLK_MQ_NO_TAG || rq->internal_tag == BLK_MQ_NO_TAG) |
345 | return; |
346 | |
347 | __blk_mq_put_driver_tag(hctx: rq->mq_hctx, rq); |
348 | } |
349 | |
350 | bool __blk_mq_alloc_driver_tag(struct request *rq); |
351 | |
352 | static inline bool blk_mq_get_driver_tag(struct request *rq) |
353 | { |
354 | if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) |
355 | return false; |
356 | |
357 | return true; |
358 | } |
359 | |
360 | static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) |
361 | { |
362 | int cpu; |
363 | |
364 | for_each_possible_cpu(cpu) |
365 | qmap->mq_map[cpu] = 0; |
366 | } |
367 | |
368 | /* |
369 | * blk_mq_plug() - Get caller context plug |
370 | * @bio : the bio being submitted by the caller context |
371 | * |
372 | * Plugging, by design, may delay the insertion of BIOs into the elevator in |
373 | * order to increase BIO merging opportunities. This however can cause BIO |
374 | * insertion order to change from the order in which submit_bio() is being |
375 | * executed in the case of multiple contexts concurrently issuing BIOs to a |
376 | * device, even if these context are synchronized to tightly control BIO issuing |
377 | * order. While this is not a problem with regular block devices, this ordering |
378 | * change can cause write BIO failures with zoned block devices as these |
379 | * require sequential write patterns to zones. Prevent this from happening by |
380 | * ignoring the plug state of a BIO issuing context if it is for a zoned block |
381 | * device and the BIO to plug is a write operation. |
382 | * |
383 | * Return current->plug if the bio can be plugged and NULL otherwise |
384 | */ |
385 | static inline struct blk_plug *blk_mq_plug( struct bio *bio) |
386 | { |
387 | /* Zoned block device write operation case: do not plug the BIO */ |
388 | if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && |
389 | bdev_op_is_zoned_write(bdev: bio->bi_bdev, op: bio_op(bio))) |
390 | return NULL; |
391 | |
392 | /* |
393 | * For regular block devices or read operations, use the context plug |
394 | * which may be NULL if blk_start_plug() was not executed. |
395 | */ |
396 | return current->plug; |
397 | } |
398 | |
399 | /* Free all requests on the list */ |
400 | static inline void blk_mq_free_requests(struct list_head *list) |
401 | { |
402 | while (!list_empty(head: list)) { |
403 | struct request *rq = list_entry_rq(list->next); |
404 | |
405 | list_del_init(entry: &rq->queuelist); |
406 | blk_mq_free_request(rq); |
407 | } |
408 | } |
409 | |
410 | /* |
411 | * For shared tag users, we track the number of currently active users |
412 | * and attempt to provide a fair share of the tag depth for each of them. |
413 | */ |
414 | static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, |
415 | struct sbitmap_queue *bt) |
416 | { |
417 | unsigned int depth, users; |
418 | |
419 | if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) |
420 | return true; |
421 | |
422 | /* |
423 | * Don't try dividing an ant |
424 | */ |
425 | if (bt->sb.depth == 1) |
426 | return true; |
427 | |
428 | if (blk_mq_is_shared_tags(flags: hctx->flags)) { |
429 | struct request_queue *q = hctx->queue; |
430 | |
431 | if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) |
432 | return true; |
433 | } else { |
434 | if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) |
435 | return true; |
436 | } |
437 | |
438 | users = READ_ONCE(hctx->tags->active_queues); |
439 | if (!users) |
440 | return true; |
441 | |
442 | /* |
443 | * Allow at least some tags |
444 | */ |
445 | depth = max((bt->sb.depth + users - 1) / users, 4U); |
446 | return __blk_mq_active_requests(hctx) < depth; |
447 | } |
448 | |
449 | /* run the code block in @dispatch_ops with rcu/srcu read lock held */ |
450 | #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \ |
451 | do { \ |
452 | if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) { \ |
453 | struct blk_mq_tag_set *__tag_set = (q)->tag_set; \ |
454 | int srcu_idx; \ |
455 | \ |
456 | might_sleep_if(check_sleep); \ |
457 | srcu_idx = srcu_read_lock(__tag_set->srcu); \ |
458 | (dispatch_ops); \ |
459 | srcu_read_unlock(__tag_set->srcu, srcu_idx); \ |
460 | } else { \ |
461 | rcu_read_lock(); \ |
462 | (dispatch_ops); \ |
463 | rcu_read_unlock(); \ |
464 | } \ |
465 | } while (0) |
466 | |
467 | #define blk_mq_run_dispatch_ops(q, dispatch_ops) \ |
468 | __blk_mq_run_dispatch_ops(q, true, dispatch_ops) \ |
469 | |
470 | #endif |
471 | |