1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | |
3 | /* |
4 | * A simple wrapper around refcount. An allocated sched_core_cookie's |
5 | * address is used to compute the cookie of the task. |
6 | */ |
7 | struct sched_core_cookie { |
8 | refcount_t refcnt; |
9 | }; |
10 | |
11 | static unsigned long sched_core_alloc_cookie(void) |
12 | { |
13 | struct sched_core_cookie *ck = kmalloc(size: sizeof(*ck), GFP_KERNEL); |
14 | if (!ck) |
15 | return 0; |
16 | |
17 | refcount_set(r: &ck->refcnt, n: 1); |
18 | sched_core_get(); |
19 | |
20 | return (unsigned long)ck; |
21 | } |
22 | |
23 | static void sched_core_put_cookie(unsigned long cookie) |
24 | { |
25 | struct sched_core_cookie *ptr = (void *)cookie; |
26 | |
27 | if (ptr && refcount_dec_and_test(r: &ptr->refcnt)) { |
28 | kfree(objp: ptr); |
29 | sched_core_put(); |
30 | } |
31 | } |
32 | |
33 | static unsigned long sched_core_get_cookie(unsigned long cookie) |
34 | { |
35 | struct sched_core_cookie *ptr = (void *)cookie; |
36 | |
37 | if (ptr) |
38 | refcount_inc(r: &ptr->refcnt); |
39 | |
40 | return cookie; |
41 | } |
42 | |
43 | /* |
44 | * sched_core_update_cookie - replace the cookie on a task |
45 | * @p: the task to update |
46 | * @cookie: the new cookie |
47 | * |
48 | * Effectively exchange the task cookie; caller is responsible for lifetimes on |
49 | * both ends. |
50 | * |
51 | * Returns: the old cookie |
52 | */ |
53 | static unsigned long sched_core_update_cookie(struct task_struct *p, |
54 | unsigned long cookie) |
55 | { |
56 | unsigned long old_cookie; |
57 | struct rq_flags rf; |
58 | struct rq *rq; |
59 | |
60 | rq = task_rq_lock(p, rf: &rf); |
61 | |
62 | /* |
63 | * Since creating a cookie implies sched_core_get(), and we cannot set |
64 | * a cookie until after we've created it, similarly, we cannot destroy |
65 | * a cookie until after we've removed it, we must have core scheduling |
66 | * enabled here. |
67 | */ |
68 | SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq)); |
69 | |
70 | if (sched_core_enqueued(p)) |
71 | sched_core_dequeue(rq, p, DEQUEUE_SAVE); |
72 | |
73 | old_cookie = p->core_cookie; |
74 | p->core_cookie = cookie; |
75 | |
76 | /* |
77 | * Consider the cases: !prev_cookie and !cookie. |
78 | */ |
79 | if (cookie && task_on_rq_queued(p)) |
80 | sched_core_enqueue(rq, p); |
81 | |
82 | /* |
83 | * If task is currently running, it may not be compatible anymore after |
84 | * the cookie change, so enter the scheduler on its CPU to schedule it |
85 | * away. |
86 | * |
87 | * Note that it is possible that as a result of this cookie change, the |
88 | * core has now entered/left forced idle state. Defer accounting to the |
89 | * next scheduling edge, rather than always forcing a reschedule here. |
90 | */ |
91 | if (task_on_cpu(rq, p)) |
92 | resched_curr(rq); |
93 | |
94 | task_rq_unlock(rq, p, rf: &rf); |
95 | |
96 | return old_cookie; |
97 | } |
98 | |
99 | static unsigned long sched_core_clone_cookie(struct task_struct *p) |
100 | { |
101 | unsigned long cookie, flags; |
102 | |
103 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
104 | cookie = sched_core_get_cookie(cookie: p->core_cookie); |
105 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
106 | |
107 | return cookie; |
108 | } |
109 | |
110 | void sched_core_fork(struct task_struct *p) |
111 | { |
112 | RB_CLEAR_NODE(&p->core_node); |
113 | p->core_cookie = sched_core_clone_cookie(current); |
114 | } |
115 | |
116 | void sched_core_free(struct task_struct *p) |
117 | { |
118 | sched_core_put_cookie(cookie: p->core_cookie); |
119 | } |
120 | |
121 | static void __sched_core_set(struct task_struct *p, unsigned long cookie) |
122 | { |
123 | cookie = sched_core_get_cookie(cookie); |
124 | cookie = sched_core_update_cookie(p, cookie); |
125 | sched_core_put_cookie(cookie); |
126 | } |
127 | |
128 | /* Called from prctl interface: PR_SCHED_CORE */ |
129 | int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, |
130 | unsigned long uaddr) |
131 | { |
132 | unsigned long cookie = 0, id = 0; |
133 | struct task_struct *task, *p; |
134 | struct pid *grp; |
135 | int err = 0; |
136 | |
137 | if (!static_branch_likely(&sched_smt_present)) |
138 | return -ENODEV; |
139 | |
140 | BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID); |
141 | BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID); |
142 | BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID); |
143 | |
144 | if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 || |
145 | (cmd != PR_SCHED_CORE_GET && uaddr)) |
146 | return -EINVAL; |
147 | |
148 | rcu_read_lock(); |
149 | if (pid == 0) { |
150 | task = current; |
151 | } else { |
152 | task = find_task_by_vpid(nr: pid); |
153 | if (!task) { |
154 | rcu_read_unlock(); |
155 | return -ESRCH; |
156 | } |
157 | } |
158 | get_task_struct(t: task); |
159 | rcu_read_unlock(); |
160 | |
161 | /* |
162 | * Check if this process has the right to modify the specified |
163 | * process. Use the regular "ptrace_may_access()" checks. |
164 | */ |
165 | if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { |
166 | err = -EPERM; |
167 | goto out; |
168 | } |
169 | |
170 | switch (cmd) { |
171 | case PR_SCHED_CORE_GET: |
172 | if (type != PIDTYPE_PID || uaddr & 7) { |
173 | err = -EINVAL; |
174 | goto out; |
175 | } |
176 | cookie = sched_core_clone_cookie(p: task); |
177 | if (cookie) { |
178 | /* XXX improve ? */ |
179 | ptr_to_hashval(ptr: (void *)cookie, hashval_out: &id); |
180 | } |
181 | err = put_user(id, (u64 __user *)uaddr); |
182 | goto out; |
183 | |
184 | case PR_SCHED_CORE_CREATE: |
185 | cookie = sched_core_alloc_cookie(); |
186 | if (!cookie) { |
187 | err = -ENOMEM; |
188 | goto out; |
189 | } |
190 | break; |
191 | |
192 | case PR_SCHED_CORE_SHARE_TO: |
193 | cookie = sched_core_clone_cookie(current); |
194 | break; |
195 | |
196 | case PR_SCHED_CORE_SHARE_FROM: |
197 | if (type != PIDTYPE_PID) { |
198 | err = -EINVAL; |
199 | goto out; |
200 | } |
201 | cookie = sched_core_clone_cookie(p: task); |
202 | __sched_core_set(current, cookie); |
203 | goto out; |
204 | |
205 | default: |
206 | err = -EINVAL; |
207 | goto out; |
208 | } |
209 | |
210 | if (type == PIDTYPE_PID) { |
211 | __sched_core_set(p: task, cookie); |
212 | goto out; |
213 | } |
214 | |
215 | read_lock(&tasklist_lock); |
216 | grp = task_pid_type(task, type); |
217 | |
218 | do_each_pid_thread(grp, type, p) { |
219 | if (!ptrace_may_access(task: p, PTRACE_MODE_READ_REALCREDS)) { |
220 | err = -EPERM; |
221 | goto out_tasklist; |
222 | } |
223 | } while_each_pid_thread(grp, type, p); |
224 | |
225 | do_each_pid_thread(grp, type, p) { |
226 | __sched_core_set(p, cookie); |
227 | } while_each_pid_thread(grp, type, p); |
228 | out_tasklist: |
229 | read_unlock(&tasklist_lock); |
230 | |
231 | out: |
232 | sched_core_put_cookie(cookie); |
233 | put_task_struct(t: task); |
234 | return err; |
235 | } |
236 | |
237 | #ifdef CONFIG_SCHEDSTATS |
238 | |
239 | /* REQUIRES: rq->core's clock recently updated. */ |
240 | void __sched_core_account_forceidle(struct rq *rq) |
241 | { |
242 | const struct cpumask *smt_mask = cpu_smt_mask(cpu: cpu_of(rq)); |
243 | u64 delta, now = rq_clock(rq: rq->core); |
244 | struct rq *rq_i; |
245 | struct task_struct *p; |
246 | int i; |
247 | |
248 | lockdep_assert_rq_held(rq); |
249 | |
250 | WARN_ON_ONCE(!rq->core->core_forceidle_count); |
251 | |
252 | if (rq->core->core_forceidle_start == 0) |
253 | return; |
254 | |
255 | delta = now - rq->core->core_forceidle_start; |
256 | if (unlikely((s64)delta <= 0)) |
257 | return; |
258 | |
259 | rq->core->core_forceidle_start = now; |
260 | |
261 | if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) { |
262 | /* can't be forced idle without a running task */ |
263 | } else if (rq->core->core_forceidle_count > 1 || |
264 | rq->core->core_forceidle_occupation > 1) { |
265 | /* |
266 | * For larger SMT configurations, we need to scale the charged |
267 | * forced idle amount since there can be more than one forced |
268 | * idle sibling and more than one running cookied task. |
269 | */ |
270 | delta *= rq->core->core_forceidle_count; |
271 | delta = div_u64(dividend: delta, divisor: rq->core->core_forceidle_occupation); |
272 | } |
273 | |
274 | for_each_cpu(i, smt_mask) { |
275 | rq_i = cpu_rq(i); |
276 | p = rq_i->core_pick ?: rq_i->curr; |
277 | |
278 | if (p == rq_i->idle) |
279 | continue; |
280 | |
281 | /* |
282 | * Note: this will account forceidle to the current cpu, even |
283 | * if it comes from our SMT sibling. |
284 | */ |
285 | __account_forceidle_time(tsk: p, delta); |
286 | } |
287 | } |
288 | |
289 | void __sched_core_tick(struct rq *rq) |
290 | { |
291 | if (!rq->core->core_forceidle_count) |
292 | return; |
293 | |
294 | if (rq != rq->core) |
295 | update_rq_clock(rq: rq->core); |
296 | |
297 | __sched_core_account_forceidle(rq); |
298 | } |
299 | |
300 | #endif /* CONFIG_SCHEDSTATS */ |
301 | |