1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _BLK_CGROUP_PRIVATE_H |
3 | #define _BLK_CGROUP_PRIVATE_H |
4 | /* |
5 | * block cgroup private header |
6 | * |
7 | * Based on ideas and code from CFQ, CFS and BFQ: |
8 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> |
9 | * |
10 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> |
11 | * Paolo Valente <paolo.valente@unimore.it> |
12 | * |
13 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> |
14 | * Nauman Rafique <nauman@google.com> |
15 | */ |
16 | |
17 | #include <linux/blk-cgroup.h> |
18 | #include <linux/cgroup.h> |
19 | #include <linux/kthread.h> |
20 | #include <linux/blk-mq.h> |
21 | #include <linux/llist.h> |
22 | |
23 | struct blkcg_gq; |
24 | struct blkg_policy_data; |
25 | |
26 | |
27 | /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ |
28 | #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) |
29 | |
30 | #ifdef CONFIG_BLK_CGROUP |
31 | |
32 | enum blkg_iostat_type { |
33 | BLKG_IOSTAT_READ, |
34 | BLKG_IOSTAT_WRITE, |
35 | BLKG_IOSTAT_DISCARD, |
36 | |
37 | BLKG_IOSTAT_NR, |
38 | }; |
39 | |
40 | struct blkg_iostat { |
41 | u64 bytes[BLKG_IOSTAT_NR]; |
42 | u64 ios[BLKG_IOSTAT_NR]; |
43 | }; |
44 | |
45 | struct blkg_iostat_set { |
46 | struct u64_stats_sync sync; |
47 | struct blkcg_gq *blkg; |
48 | struct llist_node lnode; |
49 | int lqueued; /* queued in llist */ |
50 | struct blkg_iostat cur; |
51 | struct blkg_iostat last; |
52 | }; |
53 | |
54 | /* association between a blk cgroup and a request queue */ |
55 | struct blkcg_gq { |
56 | /* Pointer to the associated request_queue */ |
57 | struct request_queue *q; |
58 | struct list_head q_node; |
59 | struct hlist_node blkcg_node; |
60 | struct blkcg *blkcg; |
61 | |
62 | /* all non-root blkcg_gq's are guaranteed to have access to parent */ |
63 | struct blkcg_gq *parent; |
64 | |
65 | /* reference count */ |
66 | struct percpu_ref refcnt; |
67 | |
68 | /* is this blkg online? protected by both blkcg and q locks */ |
69 | bool online; |
70 | |
71 | struct blkg_iostat_set __percpu *iostat_cpu; |
72 | struct blkg_iostat_set iostat; |
73 | |
74 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
75 | #ifdef CONFIG_BLK_CGROUP_PUNT_BIO |
76 | spinlock_t async_bio_lock; |
77 | struct bio_list async_bios; |
78 | #endif |
79 | union { |
80 | struct work_struct async_bio_work; |
81 | struct work_struct free_work; |
82 | }; |
83 | |
84 | atomic_t use_delay; |
85 | atomic64_t delay_nsec; |
86 | atomic64_t delay_start; |
87 | u64 last_delay; |
88 | int last_use; |
89 | |
90 | struct rcu_head rcu_head; |
91 | }; |
92 | |
93 | struct blkcg { |
94 | struct cgroup_subsys_state css; |
95 | spinlock_t lock; |
96 | refcount_t online_pin; |
97 | |
98 | struct radix_tree_root blkg_tree; |
99 | struct blkcg_gq __rcu *blkg_hint; |
100 | struct hlist_head blkg_list; |
101 | |
102 | struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; |
103 | |
104 | struct list_head all_blkcgs_node; |
105 | |
106 | /* |
107 | * List of updated percpu blkg_iostat_set's since the last flush. |
108 | */ |
109 | struct llist_head __percpu *lhead; |
110 | |
111 | #ifdef CONFIG_BLK_CGROUP_FC_APPID |
112 | char fc_app_id[FC_APPID_LEN]; |
113 | #endif |
114 | #ifdef CONFIG_CGROUP_WRITEBACK |
115 | struct list_head cgwb_list; |
116 | #endif |
117 | }; |
118 | |
119 | static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) |
120 | { |
121 | return css ? container_of(css, struct blkcg, css) : NULL; |
122 | } |
123 | |
124 | /* |
125 | * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a |
126 | * request_queue (q). This is used by blkcg policies which need to track |
127 | * information per blkcg - q pair. |
128 | * |
129 | * There can be multiple active blkcg policies and each blkg:policy pair is |
130 | * represented by a blkg_policy_data which is allocated and freed by each |
131 | * policy's pd_alloc/free_fn() methods. A policy can allocate private data |
132 | * area by allocating larger data structure which embeds blkg_policy_data |
133 | * at the beginning. |
134 | */ |
135 | struct blkg_policy_data { |
136 | /* the blkg and policy id this per-policy data belongs to */ |
137 | struct blkcg_gq *blkg; |
138 | int plid; |
139 | bool online; |
140 | }; |
141 | |
142 | /* |
143 | * Policies that need to keep per-blkcg data which is independent from any |
144 | * request_queue associated to it should implement cpd_alloc/free_fn() |
145 | * methods. A policy can allocate private data area by allocating larger |
146 | * data structure which embeds blkcg_policy_data at the beginning. |
147 | * cpd_init() is invoked to let each policy handle per-blkcg data. |
148 | */ |
149 | struct blkcg_policy_data { |
150 | /* the blkcg and policy id this per-policy data belongs to */ |
151 | struct blkcg *blkcg; |
152 | int plid; |
153 | }; |
154 | |
155 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); |
156 | typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); |
157 | typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); |
158 | typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); |
159 | typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(struct gendisk *disk, |
160 | struct blkcg *blkcg, gfp_t gfp); |
161 | typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); |
162 | typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); |
163 | typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); |
164 | typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); |
165 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); |
166 | typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, |
167 | struct seq_file *s); |
168 | |
169 | struct blkcg_policy { |
170 | int plid; |
171 | /* cgroup files for the policy */ |
172 | struct cftype *dfl_cftypes; |
173 | struct cftype *legacy_cftypes; |
174 | |
175 | /* operations */ |
176 | blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; |
177 | blkcg_pol_free_cpd_fn *cpd_free_fn; |
178 | |
179 | blkcg_pol_alloc_pd_fn *pd_alloc_fn; |
180 | blkcg_pol_init_pd_fn *pd_init_fn; |
181 | blkcg_pol_online_pd_fn *pd_online_fn; |
182 | blkcg_pol_offline_pd_fn *pd_offline_fn; |
183 | blkcg_pol_free_pd_fn *pd_free_fn; |
184 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; |
185 | blkcg_pol_stat_pd_fn *pd_stat_fn; |
186 | }; |
187 | |
188 | extern struct blkcg blkcg_root; |
189 | extern bool blkcg_debug_stats; |
190 | |
191 | int blkcg_init_disk(struct gendisk *disk); |
192 | void blkcg_exit_disk(struct gendisk *disk); |
193 | |
194 | /* Blkio controller policy registration */ |
195 | int blkcg_policy_register(struct blkcg_policy *pol); |
196 | void blkcg_policy_unregister(struct blkcg_policy *pol); |
197 | int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol); |
198 | void blkcg_deactivate_policy(struct gendisk *disk, |
199 | const struct blkcg_policy *pol); |
200 | |
201 | const char *blkg_dev_name(struct blkcg_gq *blkg); |
202 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, |
203 | u64 (*prfill)(struct seq_file *, |
204 | struct blkg_policy_data *, int), |
205 | const struct blkcg_policy *pol, int data, |
206 | bool show_total); |
207 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); |
208 | |
209 | struct blkg_conf_ctx { |
210 | char *input; |
211 | char *body; |
212 | struct block_device *bdev; |
213 | struct blkcg_gq *blkg; |
214 | }; |
215 | |
216 | void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input); |
217 | int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx); |
218 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, |
219 | struct blkg_conf_ctx *ctx); |
220 | void blkg_conf_exit(struct blkg_conf_ctx *ctx); |
221 | |
222 | /** |
223 | * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg |
224 | * @return: true if this bio needs to be submitted with the root blkg context. |
225 | * |
226 | * In order to avoid priority inversions we sometimes need to issue a bio as if |
227 | * it were attached to the root blkg, and then backcharge to the actual owning |
228 | * blkg. The idea is we do bio_blkcg_css() to look up the actual context for |
229 | * the bio and attach the appropriate blkg to the bio. Then we call this helper |
230 | * and if it is true run with the root blkg for that queue and then do any |
231 | * backcharging to the originating cgroup once the io is complete. |
232 | */ |
233 | static inline bool bio_issue_as_root_blkg(struct bio *bio) |
234 | { |
235 | return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; |
236 | } |
237 | |
238 | /** |
239 | * blkg_lookup - lookup blkg for the specified blkcg - q pair |
240 | * @blkcg: blkcg of interest |
241 | * @q: request_queue of interest |
242 | * |
243 | * Lookup blkg for the @blkcg - @q pair. |
244 | |
245 | * Must be called in a RCU critical section. |
246 | */ |
247 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, |
248 | struct request_queue *q) |
249 | { |
250 | struct blkcg_gq *blkg; |
251 | |
252 | WARN_ON_ONCE(!rcu_read_lock_held()); |
253 | |
254 | if (blkcg == &blkcg_root) |
255 | return q->root_blkg; |
256 | |
257 | blkg = rcu_dereference(blkcg->blkg_hint); |
258 | if (blkg && blkg->q == q) |
259 | return blkg; |
260 | |
261 | blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); |
262 | if (blkg && blkg->q != q) |
263 | blkg = NULL; |
264 | return blkg; |
265 | } |
266 | |
267 | /** |
268 | * blkg_to_pdata - get policy private data |
269 | * @blkg: blkg of interest |
270 | * @pol: policy of interest |
271 | * |
272 | * Return pointer to private data associated with the @blkg-@pol pair. |
273 | */ |
274 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
275 | struct blkcg_policy *pol) |
276 | { |
277 | return blkg ? blkg->pd[pol->plid] : NULL; |
278 | } |
279 | |
280 | static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, |
281 | struct blkcg_policy *pol) |
282 | { |
283 | return blkcg ? blkcg->cpd[pol->plid] : NULL; |
284 | } |
285 | |
286 | /** |
287 | * pdata_to_blkg - get blkg associated with policy private data |
288 | * @pd: policy private data of interest |
289 | * |
290 | * @pd is policy private data. Determine the blkg it's associated with. |
291 | */ |
292 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) |
293 | { |
294 | return pd ? pd->blkg : NULL; |
295 | } |
296 | |
297 | static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) |
298 | { |
299 | return cpd ? cpd->blkcg : NULL; |
300 | } |
301 | |
302 | /** |
303 | * blkg_path - format cgroup path of blkg |
304 | * @blkg: blkg of interest |
305 | * @buf: target buffer |
306 | * @buflen: target buffer length |
307 | * |
308 | * Format the path of the cgroup of @blkg into @buf. |
309 | */ |
310 | static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) |
311 | { |
312 | return cgroup_path(cgrp: blkg->blkcg->css.cgroup, buf, buflen); |
313 | } |
314 | |
315 | /** |
316 | * blkg_get - get a blkg reference |
317 | * @blkg: blkg to get |
318 | * |
319 | * The caller should be holding an existing reference. |
320 | */ |
321 | static inline void blkg_get(struct blkcg_gq *blkg) |
322 | { |
323 | percpu_ref_get(ref: &blkg->refcnt); |
324 | } |
325 | |
326 | /** |
327 | * blkg_tryget - try and get a blkg reference |
328 | * @blkg: blkg to get |
329 | * |
330 | * This is for use when doing an RCU lookup of the blkg. We may be in the midst |
331 | * of freeing this blkg, so we can only use it if the refcnt is not zero. |
332 | */ |
333 | static inline bool blkg_tryget(struct blkcg_gq *blkg) |
334 | { |
335 | return blkg && percpu_ref_tryget(ref: &blkg->refcnt); |
336 | } |
337 | |
338 | /** |
339 | * blkg_put - put a blkg reference |
340 | * @blkg: blkg to put |
341 | */ |
342 | static inline void blkg_put(struct blkcg_gq *blkg) |
343 | { |
344 | percpu_ref_put(ref: &blkg->refcnt); |
345 | } |
346 | |
347 | /** |
348 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants |
349 | * @d_blkg: loop cursor pointing to the current descendant |
350 | * @pos_css: used for iteration |
351 | * @p_blkg: target blkg to walk descendants of |
352 | * |
353 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU |
354 | * read locked. If called under either blkcg or queue lock, the iteration |
355 | * is guaranteed to include all and only online blkgs. The caller may |
356 | * update @pos_css by calling css_rightmost_descendant() to skip subtree. |
357 | * @p_blkg is included in the iteration and the first node to be visited. |
358 | */ |
359 | #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ |
360 | css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ |
361 | if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ |
362 | (p_blkg)->q))) |
363 | |
364 | /** |
365 | * blkg_for_each_descendant_post - post-order walk of a blkg's descendants |
366 | * @d_blkg: loop cursor pointing to the current descendant |
367 | * @pos_css: used for iteration |
368 | * @p_blkg: target blkg to walk descendants of |
369 | * |
370 | * Similar to blkg_for_each_descendant_pre() but performs post-order |
371 | * traversal instead. Synchronization rules are the same. @p_blkg is |
372 | * included in the iteration and the last node to be visited. |
373 | */ |
374 | #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ |
375 | css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ |
376 | if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ |
377 | (p_blkg)->q))) |
378 | |
379 | static inline void blkcg_bio_issue_init(struct bio *bio) |
380 | { |
381 | bio_issue_init(issue: &bio->bi_issue, bio_sectors(bio)); |
382 | } |
383 | |
384 | static inline void blkcg_use_delay(struct blkcg_gq *blkg) |
385 | { |
386 | if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) |
387 | return; |
388 | if (atomic_add_return(i: 1, v: &blkg->use_delay) == 1) |
389 | atomic_inc(v: &blkg->blkcg->css.cgroup->congestion_count); |
390 | } |
391 | |
392 | static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) |
393 | { |
394 | int old = atomic_read(v: &blkg->use_delay); |
395 | |
396 | if (WARN_ON_ONCE(old < 0)) |
397 | return 0; |
398 | if (old == 0) |
399 | return 0; |
400 | |
401 | /* |
402 | * We do this song and dance because we can race with somebody else |
403 | * adding or removing delay. If we just did an atomic_dec we'd end up |
404 | * negative and we'd already be in trouble. We need to subtract 1 and |
405 | * then check to see if we were the last delay so we can drop the |
406 | * congestion count on the cgroup. |
407 | */ |
408 | while (old && !atomic_try_cmpxchg(v: &blkg->use_delay, old: &old, new: old - 1)) |
409 | ; |
410 | |
411 | if (old == 0) |
412 | return 0; |
413 | if (old == 1) |
414 | atomic_dec(v: &blkg->blkcg->css.cgroup->congestion_count); |
415 | return 1; |
416 | } |
417 | |
418 | /** |
419 | * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount |
420 | * @blkg: target blkg |
421 | * @delay: delay duration in nsecs |
422 | * |
423 | * When enabled with this function, the delay is not decayed and must be |
424 | * explicitly cleared with blkcg_clear_delay(). Must not be mixed with |
425 | * blkcg_[un]use_delay() and blkcg_add_delay() usages. |
426 | */ |
427 | static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) |
428 | { |
429 | int old = atomic_read(v: &blkg->use_delay); |
430 | |
431 | /* We only want 1 person setting the congestion count for this blkg. */ |
432 | if (!old && atomic_try_cmpxchg(v: &blkg->use_delay, old: &old, new: -1)) |
433 | atomic_inc(v: &blkg->blkcg->css.cgroup->congestion_count); |
434 | |
435 | atomic64_set(v: &blkg->delay_nsec, i: delay); |
436 | } |
437 | |
438 | /** |
439 | * blkcg_clear_delay - Disable allocator delay mechanism |
440 | * @blkg: target blkg |
441 | * |
442 | * Disable use_delay mechanism. See blkcg_set_delay(). |
443 | */ |
444 | static inline void blkcg_clear_delay(struct blkcg_gq *blkg) |
445 | { |
446 | int old = atomic_read(v: &blkg->use_delay); |
447 | |
448 | /* We only want 1 person clearing the congestion count for this blkg. */ |
449 | if (old && atomic_try_cmpxchg(v: &blkg->use_delay, old: &old, new: 0)) |
450 | atomic_dec(v: &blkg->blkcg->css.cgroup->congestion_count); |
451 | } |
452 | |
453 | /** |
454 | * blk_cgroup_mergeable - Determine whether to allow or disallow merges |
455 | * @rq: request to merge into |
456 | * @bio: bio to merge |
457 | * |
458 | * @bio and @rq should belong to the same cgroup and their issue_as_root should |
459 | * match. The latter is necessary as we don't want to throttle e.g. a metadata |
460 | * update because it happens to be next to a regular IO. |
461 | */ |
462 | static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) |
463 | { |
464 | return rq->bio->bi_blkg == bio->bi_blkg && |
465 | bio_issue_as_root_blkg(bio: rq->bio) == bio_issue_as_root_blkg(bio); |
466 | } |
467 | |
468 | void blk_cgroup_bio_start(struct bio *bio); |
469 | void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); |
470 | #else /* CONFIG_BLK_CGROUP */ |
471 | |
472 | struct blkg_policy_data { |
473 | }; |
474 | |
475 | struct blkcg_policy_data { |
476 | }; |
477 | |
478 | struct blkcg_policy { |
479 | }; |
480 | |
481 | struct blkcg { |
482 | }; |
483 | |
484 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } |
485 | static inline int blkcg_init_disk(struct gendisk *disk) { return 0; } |
486 | static inline void blkcg_exit_disk(struct gendisk *disk) { } |
487 | static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } |
488 | static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } |
489 | static inline int blkcg_activate_policy(struct gendisk *disk, |
490 | const struct blkcg_policy *pol) { return 0; } |
491 | static inline void blkcg_deactivate_policy(struct gendisk *disk, |
492 | const struct blkcg_policy *pol) { } |
493 | |
494 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
495 | struct blkcg_policy *pol) { return NULL; } |
496 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } |
497 | static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } |
498 | static inline void blkg_get(struct blkcg_gq *blkg) { } |
499 | static inline void blkg_put(struct blkcg_gq *blkg) { } |
500 | static inline void blkcg_bio_issue_init(struct bio *bio) { } |
501 | static inline void blk_cgroup_bio_start(struct bio *bio) { } |
502 | static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; } |
503 | |
504 | #define blk_queue_for_each_rl(rl, q) \ |
505 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) |
506 | |
507 | #endif /* CONFIG_BLK_CGROUP */ |
508 | |
509 | #endif /* _BLK_CGROUP_PRIVATE_H */ |
510 | |