1// SPDX-License-Identifier: GPL-2.0-only
2
3/*
4 * A simple wrapper around refcount. An allocated sched_core_cookie's
5 * address is used to compute the cookie of the task.
6 */
7struct sched_core_cookie {
8 refcount_t refcnt;
9};
10
11static unsigned long sched_core_alloc_cookie(void)
12{
13 struct sched_core_cookie *ck = kmalloc(size: sizeof(*ck), GFP_KERNEL);
14 if (!ck)
15 return 0;
16
17 refcount_set(r: &ck->refcnt, n: 1);
18 sched_core_get();
19
20 return (unsigned long)ck;
21}
22
23static void sched_core_put_cookie(unsigned long cookie)
24{
25 struct sched_core_cookie *ptr = (void *)cookie;
26
27 if (ptr && refcount_dec_and_test(r: &ptr->refcnt)) {
28 kfree(objp: ptr);
29 sched_core_put();
30 }
31}
32
33static unsigned long sched_core_get_cookie(unsigned long cookie)
34{
35 struct sched_core_cookie *ptr = (void *)cookie;
36
37 if (ptr)
38 refcount_inc(r: &ptr->refcnt);
39
40 return cookie;
41}
42
43/*
44 * sched_core_update_cookie - replace the cookie on a task
45 * @p: the task to update
46 * @cookie: the new cookie
47 *
48 * Effectively exchange the task cookie; caller is responsible for lifetimes on
49 * both ends.
50 *
51 * Returns: the old cookie
52 */
53static unsigned long sched_core_update_cookie(struct task_struct *p,
54 unsigned long cookie)
55{
56 unsigned long old_cookie;
57 struct rq_flags rf;
58 struct rq *rq;
59
60 rq = task_rq_lock(p, rf: &rf);
61
62 /*
63 * Since creating a cookie implies sched_core_get(), and we cannot set
64 * a cookie until after we've created it, similarly, we cannot destroy
65 * a cookie until after we've removed it, we must have core scheduling
66 * enabled here.
67 */
68 SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq));
69
70 if (sched_core_enqueued(p))
71 sched_core_dequeue(rq, p, DEQUEUE_SAVE);
72
73 old_cookie = p->core_cookie;
74 p->core_cookie = cookie;
75
76 /*
77 * Consider the cases: !prev_cookie and !cookie.
78 */
79 if (cookie && task_on_rq_queued(p))
80 sched_core_enqueue(rq, p);
81
82 /*
83 * If task is currently running, it may not be compatible anymore after
84 * the cookie change, so enter the scheduler on its CPU to schedule it
85 * away.
86 *
87 * Note that it is possible that as a result of this cookie change, the
88 * core has now entered/left forced idle state. Defer accounting to the
89 * next scheduling edge, rather than always forcing a reschedule here.
90 */
91 if (task_on_cpu(rq, p))
92 resched_curr(rq);
93
94 task_rq_unlock(rq, p, rf: &rf);
95
96 return old_cookie;
97}
98
99static unsigned long sched_core_clone_cookie(struct task_struct *p)
100{
101 unsigned long cookie, flags;
102
103 raw_spin_lock_irqsave(&p->pi_lock, flags);
104 cookie = sched_core_get_cookie(cookie: p->core_cookie);
105 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
106
107 return cookie;
108}
109
110void sched_core_fork(struct task_struct *p)
111{
112 RB_CLEAR_NODE(&p->core_node);
113 p->core_cookie = sched_core_clone_cookie(current);
114}
115
116void sched_core_free(struct task_struct *p)
117{
118 sched_core_put_cookie(cookie: p->core_cookie);
119}
120
121static void __sched_core_set(struct task_struct *p, unsigned long cookie)
122{
123 cookie = sched_core_get_cookie(cookie);
124 cookie = sched_core_update_cookie(p, cookie);
125 sched_core_put_cookie(cookie);
126}
127
128/* Called from prctl interface: PR_SCHED_CORE */
129int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
130 unsigned long uaddr)
131{
132 unsigned long cookie = 0, id = 0;
133 struct task_struct *task, *p;
134 struct pid *grp;
135 int err = 0;
136
137 if (!static_branch_likely(&sched_smt_present))
138 return -ENODEV;
139
140 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD != PIDTYPE_PID);
141 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_THREAD_GROUP != PIDTYPE_TGID);
142 BUILD_BUG_ON(PR_SCHED_CORE_SCOPE_PROCESS_GROUP != PIDTYPE_PGID);
143
144 if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 ||
145 (cmd != PR_SCHED_CORE_GET && uaddr))
146 return -EINVAL;
147
148 rcu_read_lock();
149 if (pid == 0) {
150 task = current;
151 } else {
152 task = find_task_by_vpid(nr: pid);
153 if (!task) {
154 rcu_read_unlock();
155 return -ESRCH;
156 }
157 }
158 get_task_struct(t: task);
159 rcu_read_unlock();
160
161 /*
162 * Check if this process has the right to modify the specified
163 * process. Use the regular "ptrace_may_access()" checks.
164 */
165 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
166 err = -EPERM;
167 goto out;
168 }
169
170 switch (cmd) {
171 case PR_SCHED_CORE_GET:
172 if (type != PIDTYPE_PID || uaddr & 7) {
173 err = -EINVAL;
174 goto out;
175 }
176 cookie = sched_core_clone_cookie(p: task);
177 if (cookie) {
178 /* XXX improve ? */
179 ptr_to_hashval(ptr: (void *)cookie, hashval_out: &id);
180 }
181 err = put_user(id, (u64 __user *)uaddr);
182 goto out;
183
184 case PR_SCHED_CORE_CREATE:
185 cookie = sched_core_alloc_cookie();
186 if (!cookie) {
187 err = -ENOMEM;
188 goto out;
189 }
190 break;
191
192 case PR_SCHED_CORE_SHARE_TO:
193 cookie = sched_core_clone_cookie(current);
194 break;
195
196 case PR_SCHED_CORE_SHARE_FROM:
197 if (type != PIDTYPE_PID) {
198 err = -EINVAL;
199 goto out;
200 }
201 cookie = sched_core_clone_cookie(p: task);
202 __sched_core_set(current, cookie);
203 goto out;
204
205 default:
206 err = -EINVAL;
207 goto out;
208 }
209
210 if (type == PIDTYPE_PID) {
211 __sched_core_set(p: task, cookie);
212 goto out;
213 }
214
215 read_lock(&tasklist_lock);
216 grp = task_pid_type(task, type);
217
218 do_each_pid_thread(grp, type, p) {
219 if (!ptrace_may_access(task: p, PTRACE_MODE_READ_REALCREDS)) {
220 err = -EPERM;
221 goto out_tasklist;
222 }
223 } while_each_pid_thread(grp, type, p);
224
225 do_each_pid_thread(grp, type, p) {
226 __sched_core_set(p, cookie);
227 } while_each_pid_thread(grp, type, p);
228out_tasklist:
229 read_unlock(&tasklist_lock);
230
231out:
232 sched_core_put_cookie(cookie);
233 put_task_struct(t: task);
234 return err;
235}
236
237#ifdef CONFIG_SCHEDSTATS
238
239/* REQUIRES: rq->core's clock recently updated. */
240void __sched_core_account_forceidle(struct rq *rq)
241{
242 const struct cpumask *smt_mask = cpu_smt_mask(cpu: cpu_of(rq));
243 u64 delta, now = rq_clock(rq: rq->core);
244 struct rq *rq_i;
245 struct task_struct *p;
246 int i;
247
248 lockdep_assert_rq_held(rq);
249
250 WARN_ON_ONCE(!rq->core->core_forceidle_count);
251
252 if (rq->core->core_forceidle_start == 0)
253 return;
254
255 delta = now - rq->core->core_forceidle_start;
256 if (unlikely((s64)delta <= 0))
257 return;
258
259 rq->core->core_forceidle_start = now;
260
261 if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) {
262 /* can't be forced idle without a running task */
263 } else if (rq->core->core_forceidle_count > 1 ||
264 rq->core->core_forceidle_occupation > 1) {
265 /*
266 * For larger SMT configurations, we need to scale the charged
267 * forced idle amount since there can be more than one forced
268 * idle sibling and more than one running cookied task.
269 */
270 delta *= rq->core->core_forceidle_count;
271 delta = div_u64(dividend: delta, divisor: rq->core->core_forceidle_occupation);
272 }
273
274 for_each_cpu(i, smt_mask) {
275 rq_i = cpu_rq(i);
276 p = rq_i->core_pick ?: rq_i->curr;
277
278 if (p == rq_i->idle)
279 continue;
280
281 /*
282 * Note: this will account forceidle to the current cpu, even
283 * if it comes from our SMT sibling.
284 */
285 __account_forceidle_time(tsk: p, delta);
286 }
287}
288
289void __sched_core_tick(struct rq *rq)
290{
291 if (!rq->core->core_forceidle_count)
292 return;
293
294 if (rq != rq->core)
295 update_rq_clock(rq: rq->core);
296
297 __sched_core_account_forceidle(rq);
298}
299
300#endif /* CONFIG_SCHEDSTATS */
301

source code of linux/kernel/sched/core_sched.c