1 | /* |
2 | * cgroup_freezer.c - control group freezer subsystem |
3 | * |
4 | * Copyright IBM Corporation, 2007 |
5 | * |
6 | * Author : Cedric Le Goater <clg@fr.ibm.com> |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify it |
9 | * under the terms of version 2.1 of the GNU Lesser General Public License |
10 | * as published by the Free Software Foundation. |
11 | * |
12 | * This program is distributed in the hope that it would be useful, but |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
15 | */ |
16 | |
17 | #include <linux/export.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/cgroup.h> |
20 | #include <linux/fs.h> |
21 | #include <linux/uaccess.h> |
22 | #include <linux/freezer.h> |
23 | #include <linux/seq_file.h> |
24 | #include <linux/mutex.h> |
25 | #include <linux/cpu.h> |
26 | |
27 | /* |
28 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is |
29 | * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared |
30 | * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING |
31 | * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of |
32 | * its ancestors has FREEZING_SELF set. |
33 | */ |
34 | enum freezer_state_flags { |
35 | CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ |
36 | CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ |
37 | CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */ |
38 | CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */ |
39 | |
40 | /* mask for all FREEZING flags */ |
41 | CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT, |
42 | }; |
43 | |
44 | struct freezer { |
45 | struct cgroup_subsys_state css; |
46 | unsigned int state; |
47 | }; |
48 | |
49 | static DEFINE_MUTEX(freezer_mutex); |
50 | |
51 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) |
52 | { |
53 | return css ? container_of(css, struct freezer, css) : NULL; |
54 | } |
55 | |
56 | static inline struct freezer *task_freezer(struct task_struct *task) |
57 | { |
58 | return css_freezer(css: task_css(task, subsys_id: freezer_cgrp_id)); |
59 | } |
60 | |
61 | static struct freezer *parent_freezer(struct freezer *freezer) |
62 | { |
63 | return css_freezer(css: freezer->css.parent); |
64 | } |
65 | |
66 | bool cgroup_freezing(struct task_struct *task) |
67 | { |
68 | bool ret; |
69 | unsigned int state; |
70 | |
71 | rcu_read_lock(); |
72 | /* Check if the cgroup is still FREEZING, but not FROZEN. The extra |
73 | * !FROZEN check is required, because the FREEZING bit is not cleared |
74 | * when the state FROZEN is reached. |
75 | */ |
76 | state = task_freezer(task)->state; |
77 | ret = (state & CGROUP_FREEZING) && !(state & CGROUP_FROZEN); |
78 | rcu_read_unlock(); |
79 | |
80 | return ret; |
81 | } |
82 | |
83 | static const char *freezer_state_strs(unsigned int state) |
84 | { |
85 | if (state & CGROUP_FROZEN) |
86 | return "FROZEN" ; |
87 | if (state & CGROUP_FREEZING) |
88 | return "FREEZING" ; |
89 | return "THAWED" ; |
90 | }; |
91 | |
92 | static struct cgroup_subsys_state * |
93 | freezer_css_alloc(struct cgroup_subsys_state *parent_css) |
94 | { |
95 | struct freezer *freezer; |
96 | |
97 | freezer = kzalloc(size: sizeof(struct freezer), GFP_KERNEL); |
98 | if (!freezer) |
99 | return ERR_PTR(error: -ENOMEM); |
100 | |
101 | return &freezer->css; |
102 | } |
103 | |
104 | /** |
105 | * freezer_css_online - commit creation of a freezer css |
106 | * @css: css being created |
107 | * |
108 | * We're committing to creation of @css. Mark it online and inherit |
109 | * parent's freezing state while holding both parent's and our |
110 | * freezer->lock. |
111 | */ |
112 | static int freezer_css_online(struct cgroup_subsys_state *css) |
113 | { |
114 | struct freezer *freezer = css_freezer(css); |
115 | struct freezer *parent = parent_freezer(freezer); |
116 | |
117 | cpus_read_lock(); |
118 | mutex_lock(&freezer_mutex); |
119 | |
120 | freezer->state |= CGROUP_FREEZER_ONLINE; |
121 | |
122 | if (parent && (parent->state & CGROUP_FREEZING)) { |
123 | freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; |
124 | static_branch_inc_cpuslocked(&freezer_active); |
125 | } |
126 | |
127 | mutex_unlock(lock: &freezer_mutex); |
128 | cpus_read_unlock(); |
129 | return 0; |
130 | } |
131 | |
132 | /** |
133 | * freezer_css_offline - initiate destruction of a freezer css |
134 | * @css: css being destroyed |
135 | * |
136 | * @css is going away. Mark it dead and decrement system_freezing_count if |
137 | * it was holding one. |
138 | */ |
139 | static void freezer_css_offline(struct cgroup_subsys_state *css) |
140 | { |
141 | struct freezer *freezer = css_freezer(css); |
142 | |
143 | cpus_read_lock(); |
144 | mutex_lock(&freezer_mutex); |
145 | |
146 | if (freezer->state & CGROUP_FREEZING) |
147 | static_branch_dec_cpuslocked(&freezer_active); |
148 | |
149 | freezer->state = 0; |
150 | |
151 | mutex_unlock(lock: &freezer_mutex); |
152 | cpus_read_unlock(); |
153 | } |
154 | |
155 | static void freezer_css_free(struct cgroup_subsys_state *css) |
156 | { |
157 | kfree(objp: css_freezer(css)); |
158 | } |
159 | |
160 | /* |
161 | * Tasks can be migrated into a different freezer anytime regardless of its |
162 | * current state. freezer_attach() is responsible for making new tasks |
163 | * conform to the current state. |
164 | * |
165 | * Freezer state changes and task migration are synchronized via |
166 | * @freezer->lock. freezer_attach() makes the new tasks conform to the |
167 | * current state and all following state changes can see the new tasks. |
168 | */ |
169 | static void freezer_attach(struct cgroup_taskset *tset) |
170 | { |
171 | struct task_struct *task; |
172 | struct cgroup_subsys_state *new_css; |
173 | |
174 | mutex_lock(&freezer_mutex); |
175 | |
176 | /* |
177 | * Make the new tasks conform to the current state of @new_css. |
178 | * For simplicity, when migrating any task to a FROZEN cgroup, we |
179 | * revert it to FREEZING and let update_if_frozen() determine the |
180 | * correct state later. |
181 | * |
182 | * Tasks in @tset are on @new_css but may not conform to its |
183 | * current state before executing the following - !frozen tasks may |
184 | * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. |
185 | */ |
186 | cgroup_taskset_for_each(task, new_css, tset) { |
187 | struct freezer *freezer = css_freezer(css: new_css); |
188 | |
189 | if (!(freezer->state & CGROUP_FREEZING)) { |
190 | __thaw_task(t: task); |
191 | } else { |
192 | freeze_task(p: task); |
193 | |
194 | /* clear FROZEN and propagate upwards */ |
195 | while (freezer && (freezer->state & CGROUP_FROZEN)) { |
196 | freezer->state &= ~CGROUP_FROZEN; |
197 | freezer = parent_freezer(freezer); |
198 | } |
199 | } |
200 | } |
201 | |
202 | mutex_unlock(lock: &freezer_mutex); |
203 | } |
204 | |
205 | /** |
206 | * freezer_fork - cgroup post fork callback |
207 | * @task: a task which has just been forked |
208 | * |
209 | * @task has just been created and should conform to the current state of |
210 | * the cgroup_freezer it belongs to. This function may race against |
211 | * freezer_attach(). Losing to freezer_attach() means that we don't have |
212 | * to do anything as freezer_attach() will put @task into the appropriate |
213 | * state. |
214 | */ |
215 | static void freezer_fork(struct task_struct *task) |
216 | { |
217 | struct freezer *freezer; |
218 | |
219 | /* |
220 | * The root cgroup is non-freezable, so we can skip locking the |
221 | * freezer. This is safe regardless of race with task migration. |
222 | * If we didn't race or won, skipping is obviously the right thing |
223 | * to do. If we lost and root is the new cgroup, noop is still the |
224 | * right thing to do. |
225 | */ |
226 | if (task_css_is_root(task, subsys_id: freezer_cgrp_id)) |
227 | return; |
228 | |
229 | mutex_lock(&freezer_mutex); |
230 | rcu_read_lock(); |
231 | |
232 | freezer = task_freezer(task); |
233 | if (freezer->state & CGROUP_FREEZING) |
234 | freeze_task(p: task); |
235 | |
236 | rcu_read_unlock(); |
237 | mutex_unlock(lock: &freezer_mutex); |
238 | } |
239 | |
240 | /** |
241 | * update_if_frozen - update whether a cgroup finished freezing |
242 | * @css: css of interest |
243 | * |
244 | * Once FREEZING is initiated, transition to FROZEN is lazily updated by |
245 | * calling this function. If the current state is FREEZING but not FROZEN, |
246 | * this function checks whether all tasks of this cgroup and the descendant |
247 | * cgroups finished freezing and, if so, sets FROZEN. |
248 | * |
249 | * The caller is responsible for grabbing RCU read lock and calling |
250 | * update_if_frozen() on all descendants prior to invoking this function. |
251 | * |
252 | * Task states and freezer state might disagree while tasks are being |
253 | * migrated into or out of @css, so we can't verify task states against |
254 | * @freezer state here. See freezer_attach() for details. |
255 | */ |
256 | static void update_if_frozen(struct cgroup_subsys_state *css) |
257 | { |
258 | struct freezer *freezer = css_freezer(css); |
259 | struct cgroup_subsys_state *pos; |
260 | struct css_task_iter it; |
261 | struct task_struct *task; |
262 | |
263 | lockdep_assert_held(&freezer_mutex); |
264 | |
265 | if (!(freezer->state & CGROUP_FREEZING) || |
266 | (freezer->state & CGROUP_FROZEN)) |
267 | return; |
268 | |
269 | /* are all (live) children frozen? */ |
270 | rcu_read_lock(); |
271 | css_for_each_child(pos, css) { |
272 | struct freezer *child = css_freezer(css: pos); |
273 | |
274 | if ((child->state & CGROUP_FREEZER_ONLINE) && |
275 | !(child->state & CGROUP_FROZEN)) { |
276 | rcu_read_unlock(); |
277 | return; |
278 | } |
279 | } |
280 | rcu_read_unlock(); |
281 | |
282 | /* are all tasks frozen? */ |
283 | css_task_iter_start(css, flags: 0, it: &it); |
284 | |
285 | while ((task = css_task_iter_next(it: &it))) { |
286 | if (freezing(p: task) && !frozen(p: task)) |
287 | goto out_iter_end; |
288 | } |
289 | |
290 | freezer->state |= CGROUP_FROZEN; |
291 | out_iter_end: |
292 | css_task_iter_end(it: &it); |
293 | } |
294 | |
295 | static int freezer_read(struct seq_file *m, void *v) |
296 | { |
297 | struct cgroup_subsys_state *css = seq_css(seq: m), *pos; |
298 | |
299 | mutex_lock(&freezer_mutex); |
300 | rcu_read_lock(); |
301 | |
302 | /* update states bottom-up */ |
303 | css_for_each_descendant_post(pos, css) { |
304 | if (!css_tryget_online(css: pos)) |
305 | continue; |
306 | rcu_read_unlock(); |
307 | |
308 | update_if_frozen(css: pos); |
309 | |
310 | rcu_read_lock(); |
311 | css_put(css: pos); |
312 | } |
313 | |
314 | rcu_read_unlock(); |
315 | mutex_unlock(lock: &freezer_mutex); |
316 | |
317 | seq_puts(m, s: freezer_state_strs(state: css_freezer(css)->state)); |
318 | seq_putc(m, c: '\n'); |
319 | return 0; |
320 | } |
321 | |
322 | static void freeze_cgroup(struct freezer *freezer) |
323 | { |
324 | struct css_task_iter it; |
325 | struct task_struct *task; |
326 | |
327 | css_task_iter_start(css: &freezer->css, flags: 0, it: &it); |
328 | while ((task = css_task_iter_next(it: &it))) |
329 | freeze_task(p: task); |
330 | css_task_iter_end(it: &it); |
331 | } |
332 | |
333 | static void unfreeze_cgroup(struct freezer *freezer) |
334 | { |
335 | struct css_task_iter it; |
336 | struct task_struct *task; |
337 | |
338 | css_task_iter_start(css: &freezer->css, flags: 0, it: &it); |
339 | while ((task = css_task_iter_next(it: &it))) |
340 | __thaw_task(t: task); |
341 | css_task_iter_end(it: &it); |
342 | } |
343 | |
344 | /** |
345 | * freezer_apply_state - apply state change to a single cgroup_freezer |
346 | * @freezer: freezer to apply state change to |
347 | * @freeze: whether to freeze or unfreeze |
348 | * @state: CGROUP_FREEZING_* flag to set or clear |
349 | * |
350 | * Set or clear @state on @cgroup according to @freeze, and perform |
351 | * freezing or thawing as necessary. |
352 | */ |
353 | static void freezer_apply_state(struct freezer *freezer, bool freeze, |
354 | unsigned int state) |
355 | { |
356 | /* also synchronizes against task migration, see freezer_attach() */ |
357 | lockdep_assert_held(&freezer_mutex); |
358 | |
359 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) |
360 | return; |
361 | |
362 | if (freeze) { |
363 | if (!(freezer->state & CGROUP_FREEZING)) |
364 | static_branch_inc_cpuslocked(&freezer_active); |
365 | freezer->state |= state; |
366 | freeze_cgroup(freezer); |
367 | } else { |
368 | bool was_freezing = freezer->state & CGROUP_FREEZING; |
369 | |
370 | freezer->state &= ~state; |
371 | |
372 | if (!(freezer->state & CGROUP_FREEZING)) { |
373 | freezer->state &= ~CGROUP_FROZEN; |
374 | if (was_freezing) |
375 | static_branch_dec_cpuslocked(&freezer_active); |
376 | unfreeze_cgroup(freezer); |
377 | } |
378 | } |
379 | } |
380 | |
381 | /** |
382 | * freezer_change_state - change the freezing state of a cgroup_freezer |
383 | * @freezer: freezer of interest |
384 | * @freeze: whether to freeze or thaw |
385 | * |
386 | * Freeze or thaw @freezer according to @freeze. The operations are |
387 | * recursive - all descendants of @freezer will be affected. |
388 | */ |
389 | static void freezer_change_state(struct freezer *freezer, bool freeze) |
390 | { |
391 | struct cgroup_subsys_state *pos; |
392 | |
393 | cpus_read_lock(); |
394 | /* |
395 | * Update all its descendants in pre-order traversal. Each |
396 | * descendant will try to inherit its parent's FREEZING state as |
397 | * CGROUP_FREEZING_PARENT. |
398 | */ |
399 | mutex_lock(&freezer_mutex); |
400 | rcu_read_lock(); |
401 | css_for_each_descendant_pre(pos, &freezer->css) { |
402 | struct freezer *pos_f = css_freezer(css: pos); |
403 | struct freezer *parent = parent_freezer(freezer: pos_f); |
404 | |
405 | if (!css_tryget_online(css: pos)) |
406 | continue; |
407 | rcu_read_unlock(); |
408 | |
409 | if (pos_f == freezer) |
410 | freezer_apply_state(freezer: pos_f, freeze, |
411 | state: CGROUP_FREEZING_SELF); |
412 | else |
413 | freezer_apply_state(freezer: pos_f, |
414 | freeze: parent->state & CGROUP_FREEZING, |
415 | state: CGROUP_FREEZING_PARENT); |
416 | |
417 | rcu_read_lock(); |
418 | css_put(css: pos); |
419 | } |
420 | rcu_read_unlock(); |
421 | mutex_unlock(lock: &freezer_mutex); |
422 | cpus_read_unlock(); |
423 | } |
424 | |
425 | static ssize_t freezer_write(struct kernfs_open_file *of, |
426 | char *buf, size_t nbytes, loff_t off) |
427 | { |
428 | bool freeze; |
429 | |
430 | buf = strstrip(str: buf); |
431 | |
432 | if (strcmp(buf, freezer_state_strs(state: 0)) == 0) |
433 | freeze = false; |
434 | else if (strcmp(buf, freezer_state_strs(state: CGROUP_FROZEN)) == 0) |
435 | freeze = true; |
436 | else |
437 | return -EINVAL; |
438 | |
439 | freezer_change_state(freezer: css_freezer(css: of_css(of)), freeze); |
440 | return nbytes; |
441 | } |
442 | |
443 | static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, |
444 | struct cftype *cft) |
445 | { |
446 | struct freezer *freezer = css_freezer(css); |
447 | |
448 | return (bool)(freezer->state & CGROUP_FREEZING_SELF); |
449 | } |
450 | |
451 | static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, |
452 | struct cftype *cft) |
453 | { |
454 | struct freezer *freezer = css_freezer(css); |
455 | |
456 | return (bool)(freezer->state & CGROUP_FREEZING_PARENT); |
457 | } |
458 | |
459 | static struct cftype files[] = { |
460 | { |
461 | .name = "state" , |
462 | .flags = CFTYPE_NOT_ON_ROOT, |
463 | .seq_show = freezer_read, |
464 | .write = freezer_write, |
465 | }, |
466 | { |
467 | .name = "self_freezing" , |
468 | .flags = CFTYPE_NOT_ON_ROOT, |
469 | .read_u64 = freezer_self_freezing_read, |
470 | }, |
471 | { |
472 | .name = "parent_freezing" , |
473 | .flags = CFTYPE_NOT_ON_ROOT, |
474 | .read_u64 = freezer_parent_freezing_read, |
475 | }, |
476 | { } /* terminate */ |
477 | }; |
478 | |
479 | struct cgroup_subsys freezer_cgrp_subsys = { |
480 | .css_alloc = freezer_css_alloc, |
481 | .css_online = freezer_css_online, |
482 | .css_offline = freezer_css_offline, |
483 | .css_free = freezer_css_free, |
484 | .attach = freezer_attach, |
485 | .fork = freezer_fork, |
486 | .legacy_cftypes = files, |
487 | }; |
488 | |