1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Block rq-qos policy for assigning an I/O priority class to requests. |
4 | * |
5 | * Using an rq-qos policy for assigning I/O priority class has two advantages |
6 | * over using the ioprio_set() system call: |
7 | * |
8 | * - This policy is cgroup based so it has all the advantages of cgroups. |
9 | * - While ioprio_set() does not affect page cache writeback I/O, this rq-qos |
10 | * controller affects page cache writeback I/O for filesystems that support |
11 | * assiociating a cgroup with writeback I/O. See also |
12 | * Documentation/admin-guide/cgroup-v2.rst. |
13 | */ |
14 | |
15 | #include <linux/blk-mq.h> |
16 | #include <linux/blk_types.h> |
17 | #include <linux/kernel.h> |
18 | #include <linux/module.h> |
19 | #include "blk-cgroup.h" |
20 | #include "blk-ioprio.h" |
21 | #include "blk-rq-qos.h" |
22 | |
23 | /** |
24 | * enum prio_policy - I/O priority class policy. |
25 | * @POLICY_NO_CHANGE: (default) do not modify the I/O priority class. |
26 | * @POLICY_PROMOTE_TO_RT: modify no-IOPRIO_CLASS_RT to IOPRIO_CLASS_RT. |
27 | * @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into |
28 | * IOPRIO_CLASS_BE. |
29 | * @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE. |
30 | * @POLICY_NONE_TO_RT: an alias for POLICY_PROMOTE_TO_RT. |
31 | * |
32 | * See also <linux/ioprio.h>. |
33 | */ |
34 | enum prio_policy { |
35 | POLICY_NO_CHANGE = 0, |
36 | POLICY_PROMOTE_TO_RT = 1, |
37 | POLICY_RESTRICT_TO_BE = 2, |
38 | POLICY_ALL_TO_IDLE = 3, |
39 | POLICY_NONE_TO_RT = 4, |
40 | }; |
41 | |
42 | static const char *policy_name[] = { |
43 | [POLICY_NO_CHANGE] = "no-change" , |
44 | [POLICY_PROMOTE_TO_RT] = "promote-to-rt" , |
45 | [POLICY_RESTRICT_TO_BE] = "restrict-to-be" , |
46 | [POLICY_ALL_TO_IDLE] = "idle" , |
47 | [POLICY_NONE_TO_RT] = "none-to-rt" , |
48 | }; |
49 | |
50 | static struct blkcg_policy ioprio_policy; |
51 | |
52 | /** |
53 | * struct ioprio_blkg - Per (cgroup, request queue) data. |
54 | * @pd: blkg_policy_data structure. |
55 | */ |
56 | struct ioprio_blkg { |
57 | struct blkg_policy_data pd; |
58 | }; |
59 | |
60 | /** |
61 | * struct ioprio_blkcg - Per cgroup data. |
62 | * @cpd: blkcg_policy_data structure. |
63 | * @prio_policy: One of the IOPRIO_CLASS_* values. See also <linux/ioprio.h>. |
64 | */ |
65 | struct ioprio_blkcg { |
66 | struct blkcg_policy_data cpd; |
67 | enum prio_policy prio_policy; |
68 | }; |
69 | |
70 | static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd) |
71 | { |
72 | return pd ? container_of(pd, struct ioprio_blkg, pd) : NULL; |
73 | } |
74 | |
75 | static struct ioprio_blkcg *blkcg_to_ioprio_blkcg(struct blkcg *blkcg) |
76 | { |
77 | return container_of(blkcg_to_cpd(blkcg, &ioprio_policy), |
78 | struct ioprio_blkcg, cpd); |
79 | } |
80 | |
81 | static struct ioprio_blkcg * |
82 | ioprio_blkcg_from_css(struct cgroup_subsys_state *css) |
83 | { |
84 | return blkcg_to_ioprio_blkcg(blkcg: css_to_blkcg(css)); |
85 | } |
86 | |
87 | static struct ioprio_blkcg *ioprio_blkcg_from_bio(struct bio *bio) |
88 | { |
89 | struct blkg_policy_data *pd = blkg_to_pd(blkg: bio->bi_blkg, pol: &ioprio_policy); |
90 | |
91 | if (!pd) |
92 | return NULL; |
93 | |
94 | return blkcg_to_ioprio_blkcg(blkcg: pd->blkg->blkcg); |
95 | } |
96 | |
97 | static int ioprio_show_prio_policy(struct seq_file *sf, void *v) |
98 | { |
99 | struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(css: seq_css(seq: sf)); |
100 | |
101 | seq_printf(m: sf, fmt: "%s\n" , policy_name[blkcg->prio_policy]); |
102 | return 0; |
103 | } |
104 | |
105 | static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf, |
106 | size_t nbytes, loff_t off) |
107 | { |
108 | struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(css: of_css(of)); |
109 | int ret; |
110 | |
111 | if (off != 0) |
112 | return -EIO; |
113 | /* kernfs_fop_write_iter() terminates 'buf' with '\0'. */ |
114 | ret = sysfs_match_string(policy_name, buf); |
115 | if (ret < 0) |
116 | return ret; |
117 | blkcg->prio_policy = ret; |
118 | return nbytes; |
119 | } |
120 | |
121 | static struct blkg_policy_data * |
122 | ioprio_alloc_pd(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) |
123 | { |
124 | struct ioprio_blkg *ioprio_blkg; |
125 | |
126 | ioprio_blkg = kzalloc(size: sizeof(*ioprio_blkg), flags: gfp); |
127 | if (!ioprio_blkg) |
128 | return NULL; |
129 | |
130 | return &ioprio_blkg->pd; |
131 | } |
132 | |
133 | static void ioprio_free_pd(struct blkg_policy_data *pd) |
134 | { |
135 | struct ioprio_blkg *ioprio_blkg = pd_to_ioprio(pd); |
136 | |
137 | kfree(objp: ioprio_blkg); |
138 | } |
139 | |
140 | static struct blkcg_policy_data *ioprio_alloc_cpd(gfp_t gfp) |
141 | { |
142 | struct ioprio_blkcg *blkcg; |
143 | |
144 | blkcg = kzalloc(size: sizeof(*blkcg), flags: gfp); |
145 | if (!blkcg) |
146 | return NULL; |
147 | blkcg->prio_policy = POLICY_NO_CHANGE; |
148 | return &blkcg->cpd; |
149 | } |
150 | |
151 | static void ioprio_free_cpd(struct blkcg_policy_data *cpd) |
152 | { |
153 | struct ioprio_blkcg *blkcg = container_of(cpd, typeof(*blkcg), cpd); |
154 | |
155 | kfree(objp: blkcg); |
156 | } |
157 | |
158 | #define IOPRIO_ATTRS \ |
159 | { \ |
160 | .name = "prio.class", \ |
161 | .seq_show = ioprio_show_prio_policy, \ |
162 | .write = ioprio_set_prio_policy, \ |
163 | }, \ |
164 | { } /* sentinel */ |
165 | |
166 | /* cgroup v2 attributes */ |
167 | static struct cftype ioprio_files[] = { |
168 | IOPRIO_ATTRS |
169 | }; |
170 | |
171 | /* cgroup v1 attributes */ |
172 | static struct cftype ioprio_legacy_files[] = { |
173 | IOPRIO_ATTRS |
174 | }; |
175 | |
176 | static struct blkcg_policy ioprio_policy = { |
177 | .dfl_cftypes = ioprio_files, |
178 | .legacy_cftypes = ioprio_legacy_files, |
179 | |
180 | .cpd_alloc_fn = ioprio_alloc_cpd, |
181 | .cpd_free_fn = ioprio_free_cpd, |
182 | |
183 | .pd_alloc_fn = ioprio_alloc_pd, |
184 | .pd_free_fn = ioprio_free_pd, |
185 | }; |
186 | |
187 | void blkcg_set_ioprio(struct bio *bio) |
188 | { |
189 | struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio); |
190 | u16 prio; |
191 | |
192 | if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE) |
193 | return; |
194 | |
195 | if (blkcg->prio_policy == POLICY_PROMOTE_TO_RT || |
196 | blkcg->prio_policy == POLICY_NONE_TO_RT) { |
197 | /* |
198 | * For RT threads, the default priority level is 4 because |
199 | * task_nice is 0. By promoting non-RT io-priority to RT-class |
200 | * and default level 4, those requests that are already |
201 | * RT-class but need a higher io-priority can use ioprio_set() |
202 | * to achieve this. |
203 | */ |
204 | if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) != IOPRIO_CLASS_RT) |
205 | bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 4); |
206 | return; |
207 | } |
208 | |
209 | /* |
210 | * Except for IOPRIO_CLASS_NONE, higher I/O priority numbers |
211 | * correspond to a lower priority. Hence, the max_t() below selects |
212 | * the lower priority of bi_ioprio and the cgroup I/O priority class. |
213 | * If the bio I/O priority equals IOPRIO_CLASS_NONE, the cgroup I/O |
214 | * priority is assigned to the bio. |
215 | */ |
216 | prio = max_t(u16, bio->bi_ioprio, |
217 | IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0)); |
218 | if (prio > bio->bi_ioprio) |
219 | bio->bi_ioprio = prio; |
220 | } |
221 | |
222 | void blk_ioprio_exit(struct gendisk *disk) |
223 | { |
224 | blkcg_deactivate_policy(disk, pol: &ioprio_policy); |
225 | } |
226 | |
227 | int blk_ioprio_init(struct gendisk *disk) |
228 | { |
229 | return blkcg_activate_policy(disk, pol: &ioprio_policy); |
230 | } |
231 | |
232 | static int __init ioprio_init(void) |
233 | { |
234 | return blkcg_policy_register(pol: &ioprio_policy); |
235 | } |
236 | |
237 | static void __exit ioprio_exit(void) |
238 | { |
239 | blkcg_policy_unregister(pol: &ioprio_policy); |
240 | } |
241 | |
242 | module_init(ioprio_init); |
243 | module_exit(ioprio_exit); |
244 | |