1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/cgroup.h> |
3 | #include <linux/sched.h> |
4 | #include <linux/sched/task.h> |
5 | #include <linux/sched/signal.h> |
6 | |
7 | #include "cgroup-internal.h" |
8 | |
9 | #include <trace/events/cgroup.h> |
10 | |
11 | /* |
12 | * Propagate the cgroup frozen state upwards by the cgroup tree. |
13 | */ |
14 | static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) |
15 | { |
16 | int desc = 1; |
17 | |
18 | /* |
19 | * If the new state is frozen, some freezing ancestor cgroups may change |
20 | * their state too, depending on if all their descendants are frozen. |
21 | * |
22 | * Otherwise, all ancestor cgroups are forced into the non-frozen state. |
23 | */ |
24 | while ((cgrp = cgroup_parent(cgrp))) { |
25 | if (frozen) { |
26 | cgrp->freezer.nr_frozen_descendants += desc; |
27 | if (!test_bit(CGRP_FROZEN, &cgrp->flags) && |
28 | test_bit(CGRP_FREEZE, &cgrp->flags) && |
29 | cgrp->freezer.nr_frozen_descendants == |
30 | cgrp->nr_descendants) { |
31 | set_bit(nr: CGRP_FROZEN, addr: &cgrp->flags); |
32 | cgroup_file_notify(cfile: &cgrp->events_file); |
33 | TRACE_CGROUP_PATH(notify_frozen, cgrp, 1); |
34 | desc++; |
35 | } |
36 | } else { |
37 | cgrp->freezer.nr_frozen_descendants -= desc; |
38 | if (test_bit(CGRP_FROZEN, &cgrp->flags)) { |
39 | clear_bit(nr: CGRP_FROZEN, addr: &cgrp->flags); |
40 | cgroup_file_notify(cfile: &cgrp->events_file); |
41 | TRACE_CGROUP_PATH(notify_frozen, cgrp, 0); |
42 | desc++; |
43 | } |
44 | } |
45 | } |
46 | } |
47 | |
48 | /* |
49 | * Revisit the cgroup frozen state. |
50 | * Checks if the cgroup is really frozen and perform all state transitions. |
51 | */ |
52 | void cgroup_update_frozen(struct cgroup *cgrp) |
53 | { |
54 | bool frozen; |
55 | |
56 | lockdep_assert_held(&css_set_lock); |
57 | |
58 | /* |
59 | * If the cgroup has to be frozen (CGRP_FREEZE bit set), |
60 | * and all tasks are frozen and/or stopped, let's consider |
61 | * the cgroup frozen. Otherwise it's not frozen. |
62 | */ |
63 | frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && |
64 | cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); |
65 | |
66 | if (frozen) { |
67 | /* Already there? */ |
68 | if (test_bit(CGRP_FROZEN, &cgrp->flags)) |
69 | return; |
70 | |
71 | set_bit(nr: CGRP_FROZEN, addr: &cgrp->flags); |
72 | } else { |
73 | /* Already there? */ |
74 | if (!test_bit(CGRP_FROZEN, &cgrp->flags)) |
75 | return; |
76 | |
77 | clear_bit(nr: CGRP_FROZEN, addr: &cgrp->flags); |
78 | } |
79 | cgroup_file_notify(cfile: &cgrp->events_file); |
80 | TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); |
81 | |
82 | /* Update the state of ancestor cgroups. */ |
83 | cgroup_propagate_frozen(cgrp, frozen); |
84 | } |
85 | |
86 | /* |
87 | * Increment cgroup's nr_frozen_tasks. |
88 | */ |
89 | static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) |
90 | { |
91 | cgrp->freezer.nr_frozen_tasks++; |
92 | } |
93 | |
94 | /* |
95 | * Decrement cgroup's nr_frozen_tasks. |
96 | */ |
97 | static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) |
98 | { |
99 | cgrp->freezer.nr_frozen_tasks--; |
100 | WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); |
101 | } |
102 | |
103 | /* |
104 | * Enter frozen/stopped state, if not yet there. Update cgroup's counters, |
105 | * and revisit the state of the cgroup, if necessary. |
106 | */ |
107 | void cgroup_enter_frozen(void) |
108 | { |
109 | struct cgroup *cgrp; |
110 | |
111 | if (current->frozen) |
112 | return; |
113 | |
114 | spin_lock_irq(lock: &css_set_lock); |
115 | current->frozen = true; |
116 | cgrp = task_dfl_cgroup(current); |
117 | cgroup_inc_frozen_cnt(cgrp); |
118 | cgroup_update_frozen(cgrp); |
119 | spin_unlock_irq(lock: &css_set_lock); |
120 | } |
121 | |
122 | /* |
123 | * Conditionally leave frozen/stopped state. Update cgroup's counters, |
124 | * and revisit the state of the cgroup, if necessary. |
125 | * |
126 | * If always_leave is not set, and the cgroup is freezing, |
127 | * we're racing with the cgroup freezing. In this case, we don't |
128 | * drop the frozen counter to avoid a transient switch to |
129 | * the unfrozen state. |
130 | */ |
131 | void cgroup_leave_frozen(bool always_leave) |
132 | { |
133 | struct cgroup *cgrp; |
134 | |
135 | spin_lock_irq(lock: &css_set_lock); |
136 | cgrp = task_dfl_cgroup(current); |
137 | if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { |
138 | cgroup_dec_frozen_cnt(cgrp); |
139 | cgroup_update_frozen(cgrp); |
140 | WARN_ON_ONCE(!current->frozen); |
141 | current->frozen = false; |
142 | } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { |
143 | spin_lock(lock: ¤t->sighand->siglock); |
144 | current->jobctl |= JOBCTL_TRAP_FREEZE; |
145 | set_thread_flag(TIF_SIGPENDING); |
146 | spin_unlock(lock: ¤t->sighand->siglock); |
147 | } |
148 | spin_unlock_irq(lock: &css_set_lock); |
149 | } |
150 | |
151 | /* |
152 | * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE |
153 | * jobctl bit. |
154 | */ |
155 | static void cgroup_freeze_task(struct task_struct *task, bool freeze) |
156 | { |
157 | unsigned long flags; |
158 | |
159 | /* If the task is about to die, don't bother with freezing it. */ |
160 | if (!lock_task_sighand(task, flags: &flags)) |
161 | return; |
162 | |
163 | if (freeze) { |
164 | task->jobctl |= JOBCTL_TRAP_FREEZE; |
165 | signal_wake_up(t: task, fatal: false); |
166 | } else { |
167 | task->jobctl &= ~JOBCTL_TRAP_FREEZE; |
168 | wake_up_process(tsk: task); |
169 | } |
170 | |
171 | unlock_task_sighand(task, flags: &flags); |
172 | } |
173 | |
174 | /* |
175 | * Freeze or unfreeze all tasks in the given cgroup. |
176 | */ |
177 | static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) |
178 | { |
179 | struct css_task_iter it; |
180 | struct task_struct *task; |
181 | |
182 | lockdep_assert_held(&cgroup_mutex); |
183 | |
184 | spin_lock_irq(lock: &css_set_lock); |
185 | if (freeze) |
186 | set_bit(nr: CGRP_FREEZE, addr: &cgrp->flags); |
187 | else |
188 | clear_bit(nr: CGRP_FREEZE, addr: &cgrp->flags); |
189 | spin_unlock_irq(lock: &css_set_lock); |
190 | |
191 | if (freeze) |
192 | TRACE_CGROUP_PATH(freeze, cgrp); |
193 | else |
194 | TRACE_CGROUP_PATH(unfreeze, cgrp); |
195 | |
196 | css_task_iter_start(css: &cgrp->self, flags: 0, it: &it); |
197 | while ((task = css_task_iter_next(it: &it))) { |
198 | /* |
199 | * Ignore kernel threads here. Freezing cgroups containing |
200 | * kthreads isn't supported. |
201 | */ |
202 | if (task->flags & PF_KTHREAD) |
203 | continue; |
204 | cgroup_freeze_task(task, freeze); |
205 | } |
206 | css_task_iter_end(it: &it); |
207 | |
208 | /* |
209 | * Cgroup state should be revisited here to cover empty leaf cgroups |
210 | * and cgroups which descendants are already in the desired state. |
211 | */ |
212 | spin_lock_irq(lock: &css_set_lock); |
213 | if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) |
214 | cgroup_update_frozen(cgrp); |
215 | spin_unlock_irq(lock: &css_set_lock); |
216 | } |
217 | |
218 | /* |
219 | * Adjust the task state (freeze or unfreeze) and revisit the state of |
220 | * source and destination cgroups. |
221 | */ |
222 | void cgroup_freezer_migrate_task(struct task_struct *task, |
223 | struct cgroup *src, struct cgroup *dst) |
224 | { |
225 | lockdep_assert_held(&css_set_lock); |
226 | |
227 | /* |
228 | * Kernel threads are not supposed to be frozen at all. |
229 | */ |
230 | if (task->flags & PF_KTHREAD) |
231 | return; |
232 | |
233 | /* |
234 | * It's not necessary to do changes if both of the src and dst cgroups |
235 | * are not freezing and task is not frozen. |
236 | */ |
237 | if (!test_bit(CGRP_FREEZE, &src->flags) && |
238 | !test_bit(CGRP_FREEZE, &dst->flags) && |
239 | !task->frozen) |
240 | return; |
241 | |
242 | /* |
243 | * Adjust counters of freezing and frozen tasks. |
244 | * Note, that if the task is frozen, but the destination cgroup is not |
245 | * frozen, we bump both counters to keep them balanced. |
246 | */ |
247 | if (task->frozen) { |
248 | cgroup_inc_frozen_cnt(cgrp: dst); |
249 | cgroup_dec_frozen_cnt(cgrp: src); |
250 | } |
251 | cgroup_update_frozen(cgrp: dst); |
252 | cgroup_update_frozen(cgrp: src); |
253 | |
254 | /* |
255 | * Force the task to the desired state. |
256 | */ |
257 | cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); |
258 | } |
259 | |
260 | void cgroup_freeze(struct cgroup *cgrp, bool freeze) |
261 | { |
262 | struct cgroup_subsys_state *css; |
263 | struct cgroup *dsct; |
264 | bool applied = false; |
265 | |
266 | lockdep_assert_held(&cgroup_mutex); |
267 | |
268 | /* |
269 | * Nothing changed? Just exit. |
270 | */ |
271 | if (cgrp->freezer.freeze == freeze) |
272 | return; |
273 | |
274 | cgrp->freezer.freeze = freeze; |
275 | |
276 | /* |
277 | * Propagate changes downwards the cgroup tree. |
278 | */ |
279 | css_for_each_descendant_pre(css, &cgrp->self) { |
280 | dsct = css->cgroup; |
281 | |
282 | if (cgroup_is_dead(cgrp: dsct)) |
283 | continue; |
284 | |
285 | if (freeze) { |
286 | dsct->freezer.e_freeze++; |
287 | /* |
288 | * Already frozen because of ancestor's settings? |
289 | */ |
290 | if (dsct->freezer.e_freeze > 1) |
291 | continue; |
292 | } else { |
293 | dsct->freezer.e_freeze--; |
294 | /* |
295 | * Still frozen because of ancestor's settings? |
296 | */ |
297 | if (dsct->freezer.e_freeze > 0) |
298 | continue; |
299 | |
300 | WARN_ON_ONCE(dsct->freezer.e_freeze < 0); |
301 | } |
302 | |
303 | /* |
304 | * Do change actual state: freeze or unfreeze. |
305 | */ |
306 | cgroup_do_freeze(cgrp: dsct, freeze); |
307 | applied = true; |
308 | } |
309 | |
310 | /* |
311 | * Even if the actual state hasn't changed, let's notify a user. |
312 | * The state can be enforced by an ancestor cgroup: the cgroup |
313 | * can already be in the desired state or it can be locked in the |
314 | * opposite state, so that the transition will never happen. |
315 | * In both cases it's better to notify a user, that there is |
316 | * nothing to wait for. |
317 | */ |
318 | if (!applied) { |
319 | TRACE_CGROUP_PATH(notify_frozen, cgrp, |
320 | test_bit(CGRP_FROZEN, &cgrp->flags)); |
321 | cgroup_file_notify(cfile: &cgrp->events_file); |
322 | } |
323 | } |
324 | |