1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/cgroup.h> |
3 | #include <linux/sched.h> |
4 | #include <linux/sched/task.h> |
5 | #include <linux/sched/signal.h> |
6 | |
7 | #include "cgroup-internal.h" |
8 | |
9 | #include <trace/events/cgroup.h> |
10 | |
11 | /* |
12 | * Update CGRP_FROZEN of cgroup.flag |
13 | * Return true if flags is updated; false if flags has no change |
14 | */ |
15 | static bool cgroup_update_frozen_flag(struct cgroup *cgrp, bool frozen) |
16 | { |
17 | lockdep_assert_held(&css_set_lock); |
18 | |
19 | /* Already there? */ |
20 | if (test_bit(CGRP_FROZEN, &cgrp->flags) == frozen) |
21 | return false; |
22 | |
23 | if (frozen) |
24 | set_bit(nr: CGRP_FROZEN, addr: &cgrp->flags); |
25 | else |
26 | clear_bit(nr: CGRP_FROZEN, addr: &cgrp->flags); |
27 | |
28 | cgroup_file_notify(cfile: &cgrp->events_file); |
29 | TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); |
30 | return true; |
31 | } |
32 | |
33 | /* |
34 | * Propagate the cgroup frozen state upwards by the cgroup tree. |
35 | */ |
36 | static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) |
37 | { |
38 | int desc = 1; |
39 | |
40 | /* |
41 | * If the new state is frozen, some freezing ancestor cgroups may change |
42 | * their state too, depending on if all their descendants are frozen. |
43 | * |
44 | * Otherwise, all ancestor cgroups are forced into the non-frozen state. |
45 | */ |
46 | while ((cgrp = cgroup_parent(cgrp))) { |
47 | if (frozen) { |
48 | cgrp->freezer.nr_frozen_descendants += desc; |
49 | if (!test_bit(CGRP_FREEZE, &cgrp->flags) || |
50 | (cgrp->freezer.nr_frozen_descendants != |
51 | cgrp->nr_descendants)) |
52 | continue; |
53 | } else { |
54 | cgrp->freezer.nr_frozen_descendants -= desc; |
55 | } |
56 | |
57 | if (cgroup_update_frozen_flag(cgrp, frozen)) |
58 | desc++; |
59 | } |
60 | } |
61 | |
62 | /* |
63 | * Revisit the cgroup frozen state. |
64 | * Checks if the cgroup is really frozen and perform all state transitions. |
65 | */ |
66 | void cgroup_update_frozen(struct cgroup *cgrp) |
67 | { |
68 | bool frozen; |
69 | |
70 | /* |
71 | * If the cgroup has to be frozen (CGRP_FREEZE bit set), |
72 | * and all tasks are frozen and/or stopped, let's consider |
73 | * the cgroup frozen. Otherwise it's not frozen. |
74 | */ |
75 | frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && |
76 | cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); |
77 | |
78 | /* If flags is updated, update the state of ancestor cgroups. */ |
79 | if (cgroup_update_frozen_flag(cgrp, frozen)) |
80 | cgroup_propagate_frozen(cgrp, frozen); |
81 | } |
82 | |
83 | /* |
84 | * Increment cgroup's nr_frozen_tasks. |
85 | */ |
86 | static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) |
87 | { |
88 | cgrp->freezer.nr_frozen_tasks++; |
89 | } |
90 | |
91 | /* |
92 | * Decrement cgroup's nr_frozen_tasks. |
93 | */ |
94 | static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) |
95 | { |
96 | cgrp->freezer.nr_frozen_tasks--; |
97 | WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); |
98 | } |
99 | |
100 | /* |
101 | * Enter frozen/stopped state, if not yet there. Update cgroup's counters, |
102 | * and revisit the state of the cgroup, if necessary. |
103 | */ |
104 | void cgroup_enter_frozen(void) |
105 | { |
106 | struct cgroup *cgrp; |
107 | |
108 | if (current->frozen) |
109 | return; |
110 | |
111 | spin_lock_irq(lock: &css_set_lock); |
112 | current->frozen = true; |
113 | cgrp = task_dfl_cgroup(current); |
114 | cgroup_inc_frozen_cnt(cgrp); |
115 | cgroup_update_frozen(cgrp); |
116 | spin_unlock_irq(lock: &css_set_lock); |
117 | } |
118 | |
119 | /* |
120 | * Conditionally leave frozen/stopped state. Update cgroup's counters, |
121 | * and revisit the state of the cgroup, if necessary. |
122 | * |
123 | * If always_leave is not set, and the cgroup is freezing, |
124 | * we're racing with the cgroup freezing. In this case, we don't |
125 | * drop the frozen counter to avoid a transient switch to |
126 | * the unfrozen state. |
127 | */ |
128 | void cgroup_leave_frozen(bool always_leave) |
129 | { |
130 | struct cgroup *cgrp; |
131 | |
132 | spin_lock_irq(lock: &css_set_lock); |
133 | cgrp = task_dfl_cgroup(current); |
134 | if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { |
135 | cgroup_dec_frozen_cnt(cgrp); |
136 | cgroup_update_frozen(cgrp); |
137 | WARN_ON_ONCE(!current->frozen); |
138 | current->frozen = false; |
139 | } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { |
140 | spin_lock(lock: ¤t->sighand->siglock); |
141 | current->jobctl |= JOBCTL_TRAP_FREEZE; |
142 | set_thread_flag(TIF_SIGPENDING); |
143 | spin_unlock(lock: ¤t->sighand->siglock); |
144 | } |
145 | spin_unlock_irq(lock: &css_set_lock); |
146 | } |
147 | |
148 | /* |
149 | * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE |
150 | * jobctl bit. |
151 | */ |
152 | static void cgroup_freeze_task(struct task_struct *task, bool freeze) |
153 | { |
154 | unsigned long flags; |
155 | |
156 | /* If the task is about to die, don't bother with freezing it. */ |
157 | if (!lock_task_sighand(task, flags: &flags)) |
158 | return; |
159 | |
160 | if (freeze) { |
161 | task->jobctl |= JOBCTL_TRAP_FREEZE; |
162 | signal_wake_up(t: task, fatal: false); |
163 | } else { |
164 | task->jobctl &= ~JOBCTL_TRAP_FREEZE; |
165 | wake_up_process(tsk: task); |
166 | } |
167 | |
168 | unlock_task_sighand(task, flags: &flags); |
169 | } |
170 | |
171 | /* |
172 | * Freeze or unfreeze all tasks in the given cgroup. |
173 | */ |
174 | static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) |
175 | { |
176 | struct css_task_iter it; |
177 | struct task_struct *task; |
178 | |
179 | lockdep_assert_held(&cgroup_mutex); |
180 | |
181 | spin_lock_irq(lock: &css_set_lock); |
182 | if (freeze) |
183 | set_bit(nr: CGRP_FREEZE, addr: &cgrp->flags); |
184 | else |
185 | clear_bit(nr: CGRP_FREEZE, addr: &cgrp->flags); |
186 | spin_unlock_irq(lock: &css_set_lock); |
187 | |
188 | if (freeze) |
189 | TRACE_CGROUP_PATH(freeze, cgrp); |
190 | else |
191 | TRACE_CGROUP_PATH(unfreeze, cgrp); |
192 | |
193 | css_task_iter_start(css: &cgrp->self, flags: 0, it: &it); |
194 | while ((task = css_task_iter_next(it: &it))) { |
195 | /* |
196 | * Ignore kernel threads here. Freezing cgroups containing |
197 | * kthreads isn't supported. |
198 | */ |
199 | if (task->flags & PF_KTHREAD) |
200 | continue; |
201 | cgroup_freeze_task(task, freeze); |
202 | } |
203 | css_task_iter_end(it: &it); |
204 | |
205 | /* |
206 | * Cgroup state should be revisited here to cover empty leaf cgroups |
207 | * and cgroups which descendants are already in the desired state. |
208 | */ |
209 | spin_lock_irq(lock: &css_set_lock); |
210 | if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) |
211 | cgroup_update_frozen(cgrp); |
212 | spin_unlock_irq(lock: &css_set_lock); |
213 | } |
214 | |
215 | /* |
216 | * Adjust the task state (freeze or unfreeze) and revisit the state of |
217 | * source and destination cgroups. |
218 | */ |
219 | void cgroup_freezer_migrate_task(struct task_struct *task, |
220 | struct cgroup *src, struct cgroup *dst) |
221 | { |
222 | lockdep_assert_held(&css_set_lock); |
223 | |
224 | /* |
225 | * Kernel threads are not supposed to be frozen at all. |
226 | */ |
227 | if (task->flags & PF_KTHREAD) |
228 | return; |
229 | |
230 | /* |
231 | * It's not necessary to do changes if both of the src and dst cgroups |
232 | * are not freezing and task is not frozen. |
233 | */ |
234 | if (!test_bit(CGRP_FREEZE, &src->flags) && |
235 | !test_bit(CGRP_FREEZE, &dst->flags) && |
236 | !task->frozen) |
237 | return; |
238 | |
239 | /* |
240 | * Adjust counters of freezing and frozen tasks. |
241 | * Note, that if the task is frozen, but the destination cgroup is not |
242 | * frozen, we bump both counters to keep them balanced. |
243 | */ |
244 | if (task->frozen) { |
245 | cgroup_inc_frozen_cnt(cgrp: dst); |
246 | cgroup_dec_frozen_cnt(cgrp: src); |
247 | } |
248 | cgroup_update_frozen(cgrp: dst); |
249 | cgroup_update_frozen(cgrp: src); |
250 | |
251 | /* |
252 | * Force the task to the desired state. |
253 | */ |
254 | cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); |
255 | } |
256 | |
257 | void cgroup_freeze(struct cgroup *cgrp, bool freeze) |
258 | { |
259 | struct cgroup_subsys_state *css; |
260 | struct cgroup *parent; |
261 | struct cgroup *dsct; |
262 | bool applied = false; |
263 | bool old_e; |
264 | |
265 | lockdep_assert_held(&cgroup_mutex); |
266 | |
267 | /* |
268 | * Nothing changed? Just exit. |
269 | */ |
270 | if (cgrp->freezer.freeze == freeze) |
271 | return; |
272 | |
273 | cgrp->freezer.freeze = freeze; |
274 | |
275 | /* |
276 | * Propagate changes downwards the cgroup tree. |
277 | */ |
278 | css_for_each_descendant_pre(css, &cgrp->self) { |
279 | dsct = css->cgroup; |
280 | |
281 | if (cgroup_is_dead(cgrp: dsct)) |
282 | continue; |
283 | |
284 | /* |
285 | * e_freeze is affected by parent's e_freeze and dst's freeze. |
286 | * If old e_freeze eq new e_freeze, no change, its children |
287 | * will not be affected. So do nothing and skip the subtree |
288 | */ |
289 | old_e = dsct->freezer.e_freeze; |
290 | parent = cgroup_parent(cgrp: dsct); |
291 | dsct->freezer.e_freeze = (dsct->freezer.freeze || |
292 | parent->freezer.e_freeze); |
293 | if (dsct->freezer.e_freeze == old_e) { |
294 | css = css_rightmost_descendant(pos: css); |
295 | continue; |
296 | } |
297 | |
298 | /* |
299 | * Do change actual state: freeze or unfreeze. |
300 | */ |
301 | cgroup_do_freeze(cgrp: dsct, freeze); |
302 | applied = true; |
303 | } |
304 | |
305 | /* |
306 | * Even if the actual state hasn't changed, let's notify a user. |
307 | * The state can be enforced by an ancestor cgroup: the cgroup |
308 | * can already be in the desired state or it can be locked in the |
309 | * opposite state, so that the transition will never happen. |
310 | * In both cases it's better to notify a user, that there is |
311 | * nothing to wait for. |
312 | */ |
313 | if (!applied) { |
314 | TRACE_CGROUP_PATH(notify_frozen, cgrp, |
315 | test_bit(CGRP_FROZEN, &cgrp->flags)); |
316 | cgroup_file_notify(cfile: &cgrp->events_file); |
317 | } |
318 | } |
319 | |