deadline.c source code [linux/kernel/sched/deadline.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Deadline Scheduling Class (SCHED_DEADLINE)
4	*
5	* Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
6	*
7	* Tasks that periodically executes their instances for less than their
8	* runtime won't miss any of their deadlines.
9	* Tasks that are not periodic or sporadic or that tries to execute more
10	* than their reserved bandwidth will be slowed down (and may potentially
11	* miss some of their deadlines), and won't affect any other task.
12	*
13	* Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
14	* Juri Lelli <juri.lelli@gmail.com>,
15	* Michael Trimarchi <michael@amarulasolutions.com>,
16	* Fabio Checconi <fchecconi@gmail.com>
17	*/
18
19	#include <linux/cpuset.h>
20
21	/*
22	* Default limits for DL period; on the top end we guard against small util
23	* tasks still getting ridiculously long effective runtimes, on the bottom end we
24	* guard against timer DoS.
25	*/
26	static unsigned int sysctl_sched_dl_period_max = `1` << `22`; / ~4 seconds /
27	static unsigned int sysctl_sched_dl_period_min = `100`; / 100 us /
28	#ifdef CONFIG_SYSCTL
29	static const struct ctl_table sched_dl_sysctls[] = {
30	{
31	.procname = "sched_deadline_period_max_us",
32	.data = &sysctl_sched_dl_period_max,
33	.maxlen = sizeof(unsigned int),
34	.mode = `0644`,
35	.proc_handler = proc_douintvec_minmax,
36	.extra1 = (void *)&sysctl_sched_dl_period_min,
37	},
38	{
39	.procname = "sched_deadline_period_min_us",
40	.data = &sysctl_sched_dl_period_min,
41	.maxlen = sizeof(unsigned int),
42	.mode = `0644`,
43	.proc_handler = proc_douintvec_minmax,
44	.extra2 = (void *)&sysctl_sched_dl_period_max,
45	},
46	};
47
48	static int __init sched_dl_sysctl_init(void)
49	{
50	register_sysctl_init("kernel", sched_dl_sysctls);
51	return `0`;
52	}
53	late_initcall(sched_dl_sysctl_init);
54	#endif
55
56	static bool dl_server(struct sched_dl_entity *dl_se)
57	{
58	return dl_se->dl_server;
59	}
60
61	static inline struct task_struct dl_task_of(struct* sched_dl_entity *dl_se)
62	{
63	BUG_ON(dl_server(dl_se));
64	return container_of(dl_se, struct task_struct, dl);
65	}
66
67	static inline struct rq rq_of_dl_rq(struct* dl_rq *dl_rq)
68	{
69	return container_of(dl_rq, struct rq, dl);
70	}
71
72	static inline struct rq rq_of_dl_se(struct* sched_dl_entity *dl_se)
73	{
74	struct rq *rq = dl_se->rq;
75
76	if (!dl_server(dl_se))
77	rq = task_rq(dl_task_of(dl_se));
78
79	return rq;
80	}
81
82	static inline struct dl_rq dl_rq_of_se(struct* sched_dl_entity *dl_se)
83	{
84	return &rq_of_dl_se(dl_se)->dl;
85	}
86
87	static inline int on_dl_rq(struct sched_dl_entity *dl_se)
88	{
89	return !RB_EMPTY_NODE(&dl_se->rb_node);
90	}
91
92	#ifdef CONFIG_RT_MUTEXES
93	static inline struct sched_dl_entity pi_of(struct* sched_dl_entity *dl_se)
94	{
95	return dl_se->pi_se;
96	}
97
98	static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
99	{
100	return pi_of(dl_se) != dl_se;
101	}
102	#else
103	static inline struct sched_dl_entity pi_of(struct* sched_dl_entity *dl_se)
104	{
105	return dl_se;
106	}
107
108	static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
109	{
110	return false;
111	}
112	#endif
113
114	#ifdef CONFIG_SMP
115	static inline struct dl_bw dl_bw_of(int* i)
116	{
117	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
118	"sched RCU must be held");
119	return &cpu_rq(i)->rd->dl_bw;
120	}
121
122	static inline int dl_bw_cpus(int i)
123	{
124	struct root_domain *rd = cpu_rq(i)->rd;
125	int cpus;
126
127	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
128	"sched RCU must be held");
129
130	if (cpumask_subset(src1p: rd->span, cpu_active_mask))
131	return cpumask_weight(srcp: rd->span);
132
133	cpus = `0`;
134
135	for_each_cpu_and(i, rd->span, cpu_active_mask)
136	cpus++;
137
138	return cpus;
139	}
140
141	static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
142	{
143	unsigned long cap = `0`;
144	int i;
145
146	for_each_cpu_and(i, mask, cpu_active_mask)
147	cap += arch_scale_cpu_capacity(cpu: i);
148
149	return cap;
150	}
151
152	/*
153	* XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
154	* of the CPU the task is running on rather rd's \Sum CPU capacity.
155	*/
156	static inline unsigned long dl_bw_capacity(int i)
157	{
158	if (!sched_asym_cpucap_active() &&
159	arch_scale_cpu_capacity(cpu: i) == SCHED_CAPACITY_SCALE) {
160	return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
161	} else {
162	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
163	"sched RCU must be held");
164
165	return __dl_bw_capacity(cpu_rq(i)->rd->span);
166	}
167	}
168
169	bool dl_bw_visited(int cpu, u64 cookie)
170	{
171	struct root_domain *rd = cpu_rq(cpu)->rd;
172
173	if (rd->visit_cookie == cookie)
174	return true;
175
176	rd->visit_cookie = cookie;
177	return false;
178	}
179
180	static inline
181	void __dl_update(struct dl_bw *dl_b, s64 bw)
182	{
183	struct root_domain rd = container_of(dl_b, struct* root_domain, dl_bw);
184	int i;
185
186	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
187	"sched RCU must be held");
188	for_each_cpu_and(i, rd->span, cpu_active_mask) {
189	struct rq *rq = cpu_rq(i);
190
191	rq->dl.extra_bw += bw;
192	}
193	}
194	#else
195	static inline struct dl_bw dl_bw_of(int* i)
196	{
197	return &cpu_rq(i)->dl.dl_bw;
198	}
199
200	static inline int dl_bw_cpus(int i)
201	{
202	return `1`;
203	}
204
205	static inline unsigned long dl_bw_capacity(int i)
206	{
207	return SCHED_CAPACITY_SCALE;
208	}
209
210	bool dl_bw_visited(int cpu, u64 cookie)
211	{
212	return false;
213	}
214
215	static inline
216	void __dl_update(struct dl_bw *dl_b, s64 bw)
217	{
218	struct dl_rq dl = container_of(dl_b, struct* dl_rq, dl_bw);
219
220	dl->extra_bw += bw;
221	}
222	#endif
223
224	static inline
225	void __dl_sub(struct dl_bw dl_b, u64 tsk_bw, int* cpus)
226	{
227	dl_b->total_bw -= tsk_bw;
228	__dl_update(dl_b, bw: (s32)tsk_bw / cpus);
229	}
230
231	static inline
232	void __dl_add(struct dl_bw dl_b, u64 tsk_bw, int* cpus)
233	{
234	dl_b->total_bw += tsk_bw;
235	__dl_update(dl_b, bw: -((s32)tsk_bw / cpus));
236	}
237
238	static inline bool
239	__dl_overflow(struct dl_bw dl_b, unsigned* long cap, u64 old_bw, u64 new_bw)
240	{
241	return dl_b->bw != -`1` &&
242	cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
243	}
244
245	static inline
246	void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
247	{
248	u64 old = dl_rq->running_bw;
249
250	lockdep_assert_rq_held(rq: rq_of_dl_rq(dl_rq));
251	dl_rq->running_bw += dl_bw;
252	WARN_ON_ONCE(dl_rq->running_bw < old); / overflow /
253	WARN_ON_ONCE(dl_rq->running_bw > dl_rq->this_bw);
254	/ kick cpufreq (see the comment in kernel/sched/sched.h). /
255	cpufreq_update_util(rq: rq_of_dl_rq(dl_rq), flags: `0`);
256	}
257
258	static inline
259	void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
260	{
261	u64 old = dl_rq->running_bw;
262
263	lockdep_assert_rq_held(rq: rq_of_dl_rq(dl_rq));
264	dl_rq->running_bw -= dl_bw;
265	WARN_ON_ONCE(dl_rq->running_bw > old); / underflow /
266	if (dl_rq->running_bw > old)
267	dl_rq->running_bw = `0`;
268	/ kick cpufreq (see the comment in kernel/sched/sched.h). /
269	cpufreq_update_util(rq: rq_of_dl_rq(dl_rq), flags: `0`);
270	}
271
272	static inline
273	void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
274	{
275	u64 old = dl_rq->this_bw;
276
277	lockdep_assert_rq_held(rq: rq_of_dl_rq(dl_rq));
278	dl_rq->this_bw += dl_bw;
279	WARN_ON_ONCE(dl_rq->this_bw < old); / overflow /
280	}
281
282	static inline
283	void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
284	{
285	u64 old = dl_rq->this_bw;
286
287	lockdep_assert_rq_held(rq: rq_of_dl_rq(dl_rq));
288	dl_rq->this_bw -= dl_bw;
289	WARN_ON_ONCE(dl_rq->this_bw > old); / underflow /
290	if (dl_rq->this_bw > old)
291	dl_rq->this_bw = `0`;
292	WARN_ON_ONCE(dl_rq->running_bw > dl_rq->this_bw);
293	}
294
295	static inline
296	void add_rq_bw(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
297	{
298	if (!dl_entity_is_special(dl_se))
299	__add_rq_bw(dl_bw: dl_se->dl_bw, dl_rq);
300	}
301
302	static inline
303	void sub_rq_bw(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
304	{
305	if (!dl_entity_is_special(dl_se))
306	__sub_rq_bw(dl_bw: dl_se->dl_bw, dl_rq);
307	}
308
309	static inline
310	void add_running_bw(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
311	{
312	if (!dl_entity_is_special(dl_se))
313	__add_running_bw(dl_bw: dl_se->dl_bw, dl_rq);
314	}
315
316	static inline
317	void sub_running_bw(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
318	{
319	if (!dl_entity_is_special(dl_se))
320	__sub_running_bw(dl_bw: dl_se->dl_bw, dl_rq);
321	}
322
323	static void dl_rq_change_utilization(struct rq rq, struct* sched_dl_entity *dl_se, u64 new_bw)
324	{
325	if (dl_se->dl_non_contending) {
326	sub_running_bw(dl_se, dl_rq: &rq->dl);
327	dl_se->dl_non_contending = `0`;
328
329	/*
330	* If the timer handler is currently running and the
331	* timer cannot be canceled, inactive_task_timer()
332	* will see that dl_not_contending is not set, and
333	* will not touch the rq's active utilization,
334	* so we are still safe.
335	*/
336	if (hrtimer_try_to_cancel(timer: &dl_se->inactive_timer) == `1`) {
337	if (!dl_server(dl_se))
338	put_task_struct(t: dl_task_of(dl_se));
339	}
340	}
341	__sub_rq_bw(dl_bw: dl_se->dl_bw, dl_rq: &rq->dl);
342	__add_rq_bw(dl_bw: new_bw, dl_rq: &rq->dl);
343	}
344
345	static __always_inline
346	void cancel_dl_timer(struct sched_dl_entity dl_se, struct* hrtimer *timer)
347	{
348	/*
349	* If the timer callback was running (hrtimer_try_to_cancel == -1),
350	* it will eventually call put_task_struct().
351	*/
352	if (hrtimer_try_to_cancel(timer) == `1` && !dl_server(dl_se))
353	put_task_struct(t: dl_task_of(dl_se));
354	}
355
356	static __always_inline
357	void cancel_replenish_timer(struct sched_dl_entity *dl_se)
358	{
359	cancel_dl_timer(dl_se, timer: &dl_se->dl_timer);
360	}
361
362	static __always_inline
363	void cancel_inactive_timer(struct sched_dl_entity *dl_se)
364	{
365	cancel_dl_timer(dl_se, timer: &dl_se->inactive_timer);
366	}
367
368	static void dl_change_utilization(struct task_struct *p, u64 new_bw)
369	{
370	WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
371
372	if (task_on_rq_queued(p))
373	return;
374
375	dl_rq_change_utilization(task_rq(p), dl_se: &p->dl, new_bw);
376	}
377
378	static void __dl_clear_params(struct sched_dl_entity *dl_se);
379
380	/*
381	* The utilization of a task cannot be immediately removed from
382	* the rq active utilization (running_bw) when the task blocks.
383	* Instead, we have to wait for the so called "0-lag time".
384	*
385	* If a task blocks before the "0-lag time", a timer (the inactive
386	* timer) is armed, and running_bw is decreased when the timer
387	* fires.
388	*
389	* If the task wakes up again before the inactive timer fires,
390	* the timer is canceled, whereas if the task wakes up after the
391	* inactive timer fired (and running_bw has been decreased) the
392	* task's utilization has to be added to running_bw again.
393	* A flag in the deadline scheduling entity (dl_non_contending)
394	* is used to avoid race conditions between the inactive timer handler
395	* and task wakeups.
396	*
397	* The following diagram shows how running_bw is updated. A task is
398	* "ACTIVE" when its utilization contributes to running_bw; an
399	* "ACTIVE contending" task is in the TASK_RUNNING state, while an
400	* "ACTIVE non contending" task is a blocked task for which the "0-lag time"
401	* has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
402	* time already passed, which does not contribute to running_bw anymore.
403	* +------------------+
404	* wakeup \| ACTIVE \|
405	* +------------------>+ contending \|
406	* \| add_running_bw \| \|
407	* \| +----+------+------+
408	* \| \| ^
409	* \| dequeue \| \|
410	* +--------+-------+ \| \|
411	* \| \| t >= 0-lag \| \| wakeup
412	* \| INACTIVE \|<---------------+ \|
413	* \| \| sub_running_bw \| \|
414	* +--------+-------+ \| \|
415	* ^ \| \|
416	* \| t < 0-lag \| \|
417	* \| \| \|
418	* \| V \|
419	* \| +----+------+------+
420	* \| sub_running_bw \| ACTIVE \|
421	* +-------------------+ \|
422	* inactive timer \| non contending \|
423	* fired +------------------+
424	*
425	* The task_non_contending() function is invoked when a task
426	* blocks, and checks if the 0-lag time already passed or
427	* not (in the first case, it directly updates running_bw;
428	* in the second case, it arms the inactive timer).
429	*
430	* The task_contending() function is invoked when a task wakes
431	* up, and checks if the task is still in the "ACTIVE non contending"
432	* state or not (in the second case, it updates running_bw).
433	*/
434	static void task_non_contending(struct sched_dl_entity *dl_se)
435	{
436	struct hrtimer *timer = &dl_se->inactive_timer;
437	struct rq *rq = rq_of_dl_se(dl_se);
438	struct dl_rq *dl_rq = &rq->dl;
439	s64 zerolag_time;
440
441	/*
442	* If this is a non-deadline task that has been boosted,
443	* do nothing
444	*/
445	if (dl_se->dl_runtime == `0`)
446	return;
447
448	if (dl_entity_is_special(dl_se))
449	return;
450
451	WARN_ON(dl_se->dl_non_contending);
452
453	zerolag_time = dl_se->deadline -
454	div64_long((dl_se->runtime * dl_se->dl_period),
455	dl_se->dl_runtime);
456
457	/*
458	* Using relative times instead of the absolute "0-lag time"
459	* allows to simplify the code
460	*/
461	zerolag_time -= rq_clock(rq);
462
463	/*
464	* If the "0-lag time" already passed, decrease the active
465	* utilization now, instead of starting a timer
466	*/
467	if ((zerolag_time < `0`) \|\| hrtimer_active(timer: &dl_se->inactive_timer)) {
468	if (dl_server(dl_se)) {
469	sub_running_bw(dl_se, dl_rq);
470	} else {
471	struct task_struct *p = dl_task_of(dl_se);
472
473	if (dl_task(p))
474	sub_running_bw(dl_se, dl_rq);
475
476	if (!dl_task(p) \|\| READ_ONCE(p->__state) == TASK_DEAD) {
477	struct dl_bw *dl_b = dl_bw_of(i: task_cpu(p));
478
479	if (READ_ONCE(p->__state) == TASK_DEAD)
480	sub_rq_bw(dl_se, dl_rq: &rq->dl);
481	raw_spin_lock(&dl_b->lock);
482	__dl_sub(dl_b, tsk_bw: dl_se->dl_bw, cpus: dl_bw_cpus(i: task_cpu(p)));
483	raw_spin_unlock(&dl_b->lock);
484	__dl_clear_params(dl_se);
485	}
486	}
487
488	return;
489	}
490
491	dl_se->dl_non_contending = `1`;
492	if (!dl_server(dl_se))
493	get_task_struct(t: dl_task_of(dl_se));
494
495	hrtimer_start(timer, tim: ns_to_ktime(ns: zerolag_time), mode: HRTIMER_MODE_REL_HARD);
496	}
497
498	static void task_contending(struct sched_dl_entity dl_se, int* flags)
499	{
500	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
501
502	/*
503	* If this is a non-deadline task that has been boosted,
504	* do nothing
505	*/
506	if (dl_se->dl_runtime == `0`)
507	return;
508
509	if (flags & ENQUEUE_MIGRATED)
510	add_rq_bw(dl_se, dl_rq);
511
512	if (dl_se->dl_non_contending) {
513	dl_se->dl_non_contending = `0`;
514	/*
515	* If the timer handler is currently running and the
516	* timer cannot be canceled, inactive_task_timer()
517	* will see that dl_not_contending is not set, and
518	* will not touch the rq's active utilization,
519	* so we are still safe.
520	*/
521	cancel_inactive_timer(dl_se);
522	} else {
523	/*
524	* Since "dl_non_contending" is not set, the
525	* task's utilization has already been removed from
526	* active utilization (either when the task blocked,
527	* when the "inactive timer" fired).
528	* So, add it back.
529	*/
530	add_running_bw(dl_se, dl_rq);
531	}
532	}
533
534	static inline int is_leftmost(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
535	{
536	return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
537	}
538
539	static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
540
541	void init_dl_bw(struct dl_bw *dl_b)
542	{
543	raw_spin_lock_init(&dl_b->lock);
544	if (global_rt_runtime() == RUNTIME_INF)
545	dl_b->bw = -`1`;
546	else
547	dl_b->bw = to_ratio(period: global_rt_period(), runtime: global_rt_runtime());
548	dl_b->total_bw = `0`;
549	}
550
551	void init_dl_rq(struct dl_rq *dl_rq)
552	{
553	dl_rq->root = RB_ROOT_CACHED;
554
555	#ifdef CONFIG_SMP
556	/ zero means no -deadline tasks /
557	dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = `0`;
558
559	dl_rq->overloaded = `0`;
560	dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
561	#else
562	init_dl_bw(&dl_rq->dl_bw);
563	#endif
564
565	dl_rq->running_bw = `0`;
566	dl_rq->this_bw = `0`;
567	init_dl_rq_bw_ratio(dl_rq);
568	}
569
570	#ifdef CONFIG_SMP
571
572	static inline int dl_overloaded(struct rq *rq)
573	{
574	return atomic_read(v: &rq->rd->dlo_count);
575	}
576
577	static inline void dl_set_overload(struct rq *rq)
578	{
579	if (!rq->online)
580	return;
581
582	cpumask_set_cpu(cpu: rq->cpu, dstp: rq->rd->dlo_mask);
583	/*
584	* Must be visible before the overload count is
585	* set (as in sched_rt.c).
586	*
587	* Matched by the barrier in pull_dl_task().
588	*/
589	smp_wmb();
590	atomic_inc(v: &rq->rd->dlo_count);
591	}
592
593	static inline void dl_clear_overload(struct rq *rq)
594	{
595	if (!rq->online)
596	return;
597
598	atomic_dec(v: &rq->rd->dlo_count);
599	cpumask_clear_cpu(cpu: rq->cpu, dstp: rq->rd->dlo_mask);
600	}
601
602	#define __node_2_pdl(node) \
603	rb_entry((node), struct task_struct, pushable_dl_tasks)
604
605	static inline bool __pushable_less(struct rb_node a, const* struct rb_node *b)
606	{
607	return dl_entity_preempt(a: &__node_2_pdl(a)->dl, b: &__node_2_pdl(b)->dl);
608	}
609
610	static inline int has_pushable_dl_tasks(struct rq *rq)
611	{
612	return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
613	}
614
615	/*
616	* The list of pushable -deadline task is not a plist, like in
617	* sched_rt.c, it is an rb-tree with tasks ordered by deadline.
618	*/
619	static void enqueue_pushable_dl_task(struct rq rq, struct* task_struct *p)
620	{
621	struct rb_node *leftmost;
622
623	WARN_ON_ONCE(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
624
625	leftmost = rb_add_cached(node: &p->pushable_dl_tasks,
626	tree: &rq->dl.pushable_dl_tasks_root,
627	less: __pushable_less);
628	if (leftmost)
629	rq->dl.earliest_dl.next = p->dl.deadline;
630
631	if (!rq->dl.overloaded) {
632	dl_set_overload(rq);
633	rq->dl.overloaded = `1`;
634	}
635	}
636
637	static void dequeue_pushable_dl_task(struct rq rq, struct* task_struct *p)
638	{
639	struct dl_rq *dl_rq = &rq->dl;
640	struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root;
641	struct rb_node *leftmost;
642
643	if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
644	return;
645
646	leftmost = rb_erase_cached(node: &p->pushable_dl_tasks, root);
647	if (leftmost)
648	dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline;
649
650	RB_CLEAR_NODE(&p->pushable_dl_tasks);
651
652	if (!has_pushable_dl_tasks(rq) && rq->dl.overloaded) {
653	dl_clear_overload(rq);
654	rq->dl.overloaded = `0`;
655	}
656	}
657
658	static int push_dl_task(struct rq *rq);
659
660	static inline bool need_pull_dl_task(struct rq rq, struct* task_struct *prev)
661	{
662	return rq->online && dl_task(p: prev);
663	}
664
665	static DEFINE_PER_CPU(struct balance_callback, dl_push_head);
666	static DEFINE_PER_CPU(struct balance_callback, dl_pull_head);
667
668	static void push_dl_tasks(struct rq *);
669	static void pull_dl_task(struct rq *);
670
671	static inline void deadline_queue_push_tasks(struct rq *rq)
672	{
673	if (!has_pushable_dl_tasks(rq))
674	return;
675
676	queue_balance_callback(rq, head: &per_cpu(dl_push_head, rq->cpu), func: push_dl_tasks);
677	}
678
679	static inline void deadline_queue_pull_task(struct rq *rq)
680	{
681	queue_balance_callback(rq, head: &per_cpu(dl_pull_head, rq->cpu), func: pull_dl_task);
682	}
683
684	static struct rq find_lock_later_rq(struct* task_struct task, struct* rq *rq);
685
686	static struct rq dl_task_offline_migration(struct* rq rq, struct* task_struct *p)
687	{
688	struct rq *later_rq = NULL;
689	struct dl_bw *dl_b;
690
691	later_rq = find_lock_later_rq(task: p, rq);
692	if (!later_rq) {
693	int cpu;
694
695	/*
696	* If we cannot preempt any rq, fall back to pick any
697	* online CPU:
698	*/
699	cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
700	if (cpu >= nr_cpu_ids) {
701	/*
702	* Failed to find any suitable CPU.
703	* The task will never come back!
704	*/
705	WARN_ON_ONCE(dl_bandwidth_enabled());
706
707	/*
708	* If admission control is disabled we
709	* try a little harder to let the task
710	* run.
711	*/
712	cpu = cpumask_any(cpu_active_mask);
713	}
714	later_rq = cpu_rq(cpu);
715	double_lock_balance(this_rq: rq, busiest: later_rq);
716	}
717
718	if (p->dl.dl_non_contending \|\| p->dl.dl_throttled) {
719	/*
720	* Inactive timer is armed (or callback is running, but
721	* waiting for us to release rq locks). In any case, when it
722	* will fire (or continue), it will see running_bw of this
723	* task migrated to later_rq (and correctly handle it).
724	*/
725	sub_running_bw(dl_se: &p->dl, dl_rq: &rq->dl);
726	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
727
728	add_rq_bw(dl_se: &p->dl, dl_rq: &later_rq->dl);
729	add_running_bw(dl_se: &p->dl, dl_rq: &later_rq->dl);
730	} else {
731	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
732	add_rq_bw(dl_se: &p->dl, dl_rq: &later_rq->dl);
733	}
734
735	/*
736	* And we finally need to fix up root_domain(s) bandwidth accounting,
737	* since p is still hanging out in the old (now moved to default) root
738	* domain.
739	*/
740	dl_b = &rq->rd->dl_bw;
741	raw_spin_lock(&dl_b->lock);
742	__dl_sub(dl_b, tsk_bw: p->dl.dl_bw, cpus: cpumask_weight(srcp: rq->rd->span));
743	raw_spin_unlock(&dl_b->lock);
744
745	dl_b = &later_rq->rd->dl_bw;
746	raw_spin_lock(&dl_b->lock);
747	__dl_add(dl_b, tsk_bw: p->dl.dl_bw, cpus: cpumask_weight(srcp: later_rq->rd->span));
748	raw_spin_unlock(&dl_b->lock);
749
750	set_task_cpu(p, cpu: later_rq->cpu);
751	double_unlock_balance(this_rq: later_rq, busiest: rq);
752
753	return later_rq;
754	}
755
756	#else
757
758	static inline
759	void enqueue_pushable_dl_task(struct rq rq, struct* task_struct *p)
760	{
761	}
762
763	static inline
764	void dequeue_pushable_dl_task(struct rq rq, struct* task_struct *p)
765	{
766	}
767
768	static inline
769	void inc_dl_migration(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
770	{
771	}
772
773	static inline
774	void dec_dl_migration(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
775	{
776	}
777
778	static inline void deadline_queue_push_tasks(struct rq *rq)
779	{
780	}
781
782	static inline void deadline_queue_pull_task(struct rq *rq)
783	{
784	}
785	#endif /* CONFIG_SMP */
786
787	static void
788	enqueue_dl_entity(struct sched_dl_entity dl_se, int* flags);
789	static void enqueue_task_dl(struct rq rq, struct* task_struct p, int* flags);
790	static void dequeue_dl_entity(struct sched_dl_entity dl_se, int* flags);
791	static void wakeup_preempt_dl(struct rq rq, struct* task_struct p, int* flags);
792
793	static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
794	struct rq *rq)
795	{
796	/ for non-boosted task, pi_of(dl_se) == dl_se /
797	dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
798	dl_se->runtime = pi_of(dl_se)->dl_runtime;
799
800	/*
801	* If it is a deferred reservation, and the server
802	* is not handling an starvation case, defer it.
803	*/
804	if (dl_se->dl_defer && !dl_se->dl_defer_running) {
805	dl_se->dl_throttled = `1`;
806	dl_se->dl_defer_armed = `1`;
807	}
808	}
809
810	/*
811	* We are being explicitly informed that a new instance is starting,
812	* and this means that:
813	* - the absolute deadline of the entity has to be placed at
814	* current time + relative deadline;
815	* - the runtime of the entity has to be set to the maximum value.
816	*
817	* The capability of specifying such event is useful whenever a -deadline
818	* entity wants to (try to!) synchronize its behaviour with the scheduler's
819	* one, and to (try to!) reconcile itself with its own scheduling
820	* parameters.
821	*/
822	static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
823	{
824	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
825	struct rq *rq = rq_of_dl_rq(dl_rq);
826
827	WARN_ON(is_dl_boosted(dl_se));
828	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
829
830	/*
831	* We are racing with the deadline timer. So, do nothing because
832	* the deadline timer handler will take care of properly recharging
833	* the runtime and postponing the deadline
834	*/
835	if (dl_se->dl_throttled)
836	return;
837
838	/*
839	* We use the regular wall clock time to set deadlines in the
840	* future; in fact, we must consider execution overheads (time
841	* spent on hardirq context, etc.).
842	*/
843	replenish_dl_new_period(dl_se, rq);
844	}
845
846	static int start_dl_timer(struct sched_dl_entity *dl_se);
847	static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t);
848
849	/*
850	* Pure Earliest Deadline First (EDF) scheduling does not deal with the
851	* possibility of a entity lasting more than what it declared, and thus
852	* exhausting its runtime.
853	*
854	* Here we are interested in making runtime overrun possible, but we do
855	* not want a entity which is misbehaving to affect the scheduling of all
856	* other entities.
857	* Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
858	* is used, in order to confine each entity within its own bandwidth.
859	*
860	* This function deals exactly with that, and ensures that when the runtime
861	* of a entity is replenished, its deadline is also postponed. That ensures
862	* the overrunning entity can't interfere with other entity in the system and
863	* can't make them miss their deadlines. Reasons why this kind of overruns
864	* could happen are, typically, a entity voluntarily trying to overcome its
865	* runtime, or it just underestimated it during sched_setattr().
866	*/
867	static void replenish_dl_entity(struct sched_dl_entity *dl_se)
868	{
869	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
870	struct rq *rq = rq_of_dl_rq(dl_rq);
871
872	WARN_ON_ONCE(pi_of(dl_se)->dl_runtime <= `0`);
873
874	/*
875	* This could be the case for a !-dl task that is boosted.
876	* Just go with full inherited parameters.
877	*
878	* Or, it could be the case of a deferred reservation that
879	* was not able to consume its runtime in background and
880	* reached this point with current u > U.
881	*
882	* In both cases, set a new period.
883	*/
884	if (dl_se->dl_deadline == `0` \|\|
885	(dl_se->dl_defer_armed && dl_entity_overflow(dl_se, t: rq_clock(rq)))) {
886	dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
887	dl_se->runtime = pi_of(dl_se)->dl_runtime;
888	}
889
890	if (dl_se->dl_yielded && dl_se->runtime > `0`)
891	dl_se->runtime = `0`;
892
893	/*
894	* We keep moving the deadline away until we get some
895	* available runtime for the entity. This ensures correct
896	* handling of situations where the runtime overrun is
897	* arbitrary large.
898	*/
899	while (dl_se->runtime <= `0`) {
900	dl_se->deadline += pi_of(dl_se)->dl_period;
901	dl_se->runtime += pi_of(dl_se)->dl_runtime;
902	}
903
904	/*
905	* At this point, the deadline really should be "in
906	* the future" with respect to rq->clock. If it's
907	* not, we are, for some reason, lagging too much!
908	* Anyway, after having warn userspace abut that,
909	* we still try to keep the things running by
910	* resetting the deadline and the budget of the
911	* entity.
912	*/
913	if (dl_time_before(a: dl_se->deadline, b: rq_clock(rq))) {
914	printk_deferred_once("sched: DL replenish lagged too much\n");
915	replenish_dl_new_period(dl_se, rq);
916	}
917
918	if (dl_se->dl_yielded)
919	dl_se->dl_yielded = `0`;
920	if (dl_se->dl_throttled)
921	dl_se->dl_throttled = `0`;
922
923	/*
924	* If this is the replenishment of a deferred reservation,
925	* clear the flag and return.
926	*/
927	if (dl_se->dl_defer_armed) {
928	dl_se->dl_defer_armed = `0`;
929	return;
930	}
931
932	/*
933	* A this point, if the deferred server is not armed, and the deadline
934	* is in the future, if it is not running already, throttle the server
935	* and arm the defer timer.
936	*/
937	if (dl_se->dl_defer && !dl_se->dl_defer_running &&
938	dl_time_before(a: rq_clock(rq: dl_se->rq), b: dl_se->deadline - dl_se->runtime)) {
939	if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {
940
941	/*
942	* Set dl_se->dl_defer_armed and dl_throttled variables to
943	* inform the start_dl_timer() that this is a deferred
944	* activation.
945	*/
946	dl_se->dl_defer_armed = `1`;
947	dl_se->dl_throttled = `1`;
948	if (!start_dl_timer(dl_se)) {
949	/*
950	* If for whatever reason (delays), a previous timer was
951	* queued but not serviced, cancel it and clean the
952	* deferrable server variables intended for start_dl_timer().
953	*/
954	hrtimer_try_to_cancel(timer: &dl_se->dl_timer);
955	dl_se->dl_defer_armed = `0`;
956	dl_se->dl_throttled = `0`;
957	}
958	}
959	}
960	}
961
962	/*
963	* Here we check if --at time t-- an entity (which is probably being
964	* [re]activated or, in general, enqueued) can use its remaining runtime
965	* and its current deadline _without_ exceeding the bandwidth it is
966	* assigned (function returns true if it can't). We are in fact applying
967	* one of the CBS rules: when a task wakes up, if the residual runtime
968	* over residual deadline fits within the allocated bandwidth, then we
969	* can keep the current (absolute) deadline and residual budget without
970	* disrupting the schedulability of the system. Otherwise, we should
971	* refill the runtime and set the deadline a period in the future,
972	* because keeping the current (absolute) deadline of the task would
973	* result in breaking guarantees promised to other tasks (refer to
974	* Documentation/scheduler/sched-deadline.rst for more information).
975	*
976	* This function returns true if:
977	*
978	* runtime / (deadline - t) > dl_runtime / dl_deadline ,
979	*
980	* IOW we can't recycle current parameters.
981	*
982	* Notice that the bandwidth check is done against the deadline. For
983	* task with deadline equal to period this is the same of using
984	* dl_period instead of dl_deadline in the equation above.
985	*/
986	static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
987	{
988	u64 left, right;
989
990	/*
991	* left and right are the two sides of the equation above,
992	* after a bit of shuffling to use multiplications instead
993	* of divisions.
994	*
995	* Note that none of the time values involved in the two
996	* multiplications are absolute: dl_deadline and dl_runtime
997	* are the relative deadline and the maximum runtime of each
998	* instance, runtime is the runtime left for the last instance
999	* and (deadline - t), since t is rq->clock, is the time left
1000	* to the (absolute) deadline. Even if overflowing the u64 type
1001	* is very unlikely to occur in both cases, here we scale down
1002	* as we want to avoid that risk at all. Scaling down by 10
1003	* means that we reduce granularity to 1us. We are fine with it,
1004	* since this is only a true/false check and, anyway, thinking
1005	* of anything below microseconds resolution is actually fiction
1006	* (but still we want to give the user that illusion >;).
1007	*/
1008	left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
1009	right = ((dl_se->deadline - t) >> DL_SCALE) *
1010	(pi_of(dl_se)->dl_runtime >> DL_SCALE);
1011
1012	return dl_time_before(a: right, b: left);
1013	}
1014
1015	/*
1016	* Revised wakeup rule [1]: For self-suspending tasks, rather then
1017	* re-initializing task's runtime and deadline, the revised wakeup
1018	* rule adjusts the task's runtime to avoid the task to overrun its
1019	* density.
1020	*
1021	* Reasoning: a task may overrun the density if:
1022	* runtime / (deadline - t) > dl_runtime / dl_deadline
1023	*
1024	* Therefore, runtime can be adjusted to:
1025	* runtime = (dl_runtime / dl_deadline) * (deadline - t)
1026	*
1027	* In such way that runtime will be equal to the maximum density
1028	* the task can use without breaking any rule.
1029	*
1030	* [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
1031	* bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
1032	*/
1033	static void
1034	update_dl_revised_wakeup(struct sched_dl_entity dl_se, struct* rq *rq)
1035	{
1036	u64 laxity = dl_se->deadline - rq_clock(rq);
1037
1038	/*
1039	* If the task has deadline < period, and the deadline is in the past,
1040	* it should already be throttled before this check.
1041	*
1042	* See update_dl_entity() comments for further details.
1043	*/
1044	WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
1045
1046	dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
1047	}
1048
1049	/*
1050	* Regarding the deadline, a task with implicit deadline has a relative
1051	* deadline == relative period. A task with constrained deadline has a
1052	* relative deadline <= relative period.
1053	*
1054	* We support constrained deadline tasks. However, there are some restrictions
1055	* applied only for tasks which do not have an implicit deadline. See
1056	* update_dl_entity() to know more about such restrictions.
1057	*
1058	* The dl_is_implicit() returns true if the task has an implicit deadline.
1059	*/
1060	static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
1061	{
1062	return dl_se->dl_deadline == dl_se->dl_period;
1063	}
1064
1065	/*
1066	* When a deadline entity is placed in the runqueue, its runtime and deadline
1067	* might need to be updated. This is done by a CBS wake up rule. There are two
1068	* different rules: 1) the original CBS; and 2) the Revisited CBS.
1069	*
1070	* When the task is starting a new period, the Original CBS is used. In this
1071	* case, the runtime is replenished and a new absolute deadline is set.
1072	*
1073	* When a task is queued before the begin of the next period, using the
1074	* remaining runtime and deadline could make the entity to overflow, see
1075	* dl_entity_overflow() to find more about runtime overflow. When such case
1076	* is detected, the runtime and deadline need to be updated.
1077	*
1078	* If the task has an implicit deadline, i.e., deadline == period, the Original
1079	* CBS is applied. The runtime is replenished and a new absolute deadline is
1080	* set, as in the previous cases.
1081	*
1082	* However, the Original CBS does not work properly for tasks with
1083	* deadline < period, which are said to have a constrained deadline. By
1084	* applying the Original CBS, a constrained deadline task would be able to run
1085	* runtime/deadline in a period. With deadline < period, the task would
1086	* overrun the runtime/period allowed bandwidth, breaking the admission test.
1087	*
1088	* In order to prevent this misbehave, the Revisited CBS is used for
1089	* constrained deadline tasks when a runtime overflow is detected. In the
1090	* Revisited CBS, rather than replenishing & setting a new absolute deadline,
1091	* the remaining runtime of the task is reduced to avoid runtime overflow.
1092	* Please refer to the comments update_dl_revised_wakeup() function to find
1093	* more about the Revised CBS rule.
1094	*/
1095	static void update_dl_entity(struct sched_dl_entity *dl_se)
1096	{
1097	struct rq *rq = rq_of_dl_se(dl_se);
1098
1099	if (dl_time_before(a: dl_se->deadline, b: rq_clock(rq)) \|\|
1100	dl_entity_overflow(dl_se, t: rq_clock(rq))) {
1101
1102	if (unlikely(!dl_is_implicit(dl_se) &&
1103	!dl_time_before(dl_se->deadline, rq_clock(rq)) &&
1104	!is_dl_boosted(dl_se))) {
1105	update_dl_revised_wakeup(dl_se, rq);
1106	return;
1107	}
1108
1109	replenish_dl_new_period(dl_se, rq);
1110	} else if (dl_server(dl_se) && dl_se->dl_defer) {
1111	/*
1112	* The server can still use its previous deadline, so check if
1113	* it left the dl_defer_running state.
1114	*/
1115	if (!dl_se->dl_defer_running) {
1116	dl_se->dl_defer_armed = `1`;
1117	dl_se->dl_throttled = `1`;
1118	}
1119	}
1120	}
1121
1122	static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
1123	{
1124	return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
1125	}
1126
1127	/*
1128	* If the entity depleted all its runtime, and if we want it to sleep
1129	* while waiting for some new execution time to become available, we
1130	* set the bandwidth replenishment timer to the replenishment instant
1131	* and try to activate it.
1132	*
1133	* Notice that it is important for the caller to know if the timer
1134	* actually started or not (i.e., the replenishment instant is in
1135	* the future or in the past).
1136	*/
1137	static int start_dl_timer(struct sched_dl_entity *dl_se)
1138	{
1139	struct hrtimer *timer = &dl_se->dl_timer;
1140	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
1141	struct rq *rq = rq_of_dl_rq(dl_rq);
1142	ktime_t now, act;
1143	s64 delta;
1144
1145	lockdep_assert_rq_held(rq);
1146
1147	/*
1148	* We want the timer to fire at the deadline, but considering
1149	* that it is actually coming from rq->clock and not from
1150	* hrtimer's time base reading.
1151	*
1152	* The deferred reservation will have its timer set to
1153	* (deadline - runtime). At that point, the CBS rule will decide
1154	* if the current deadline can be used, or if a replenishment is
1155	* required to avoid add too much pressure on the system
1156	* (current u > U).
1157	*/
1158	if (dl_se->dl_defer_armed) {
1159	WARN_ON_ONCE(!dl_se->dl_throttled);
1160	act = ns_to_ktime(ns: dl_se->deadline - dl_se->runtime);
1161	} else {
1162	/ act = deadline - rel-deadline + period /
1163	act = ns_to_ktime(ns: dl_next_period(dl_se));
1164	}
1165
1166	now = hrtimer_cb_get_time(timer);
1167	delta = ktime_to_ns(kt: now) - rq_clock(rq);
1168	act = ktime_add_ns(act, delta);
1169
1170	/*
1171	* If the expiry time already passed, e.g., because the value
1172	* chosen as the deadline is too small, don't even try to
1173	* start the timer in the past!
1174	*/
1175	if (ktime_us_delta(later: act, earlier: now) < `0`)
1176	return `0`;
1177
1178	/*
1179	* !enqueued will guarantee another callback; even if one is already in
1180	* progress. This ensures a balanced {get,put}_task_struct().
1181	*
1182	* The race against __run_timer() clearing the enqueued state is
1183	* harmless because we're holding task_rq()->lock, therefore the timer
1184	* expiring after we've done the check will wait on its task_rq_lock()
1185	* and observe our state.
1186	*/
1187	if (!hrtimer_is_queued(timer)) {
1188	if (!dl_server(dl_se))
1189	get_task_struct(t: dl_task_of(dl_se));
1190	hrtimer_start(timer, tim: act, mode: HRTIMER_MODE_ABS_HARD);
1191	}
1192
1193	return `1`;
1194	}
1195
1196	static void __push_dl_task(struct rq rq, struct* rq_flags *rf)
1197	{
1198	#ifdef CONFIG_SMP
1199	/*
1200	* Queueing this task back might have overloaded rq, check if we need
1201	* to kick someone away.
1202	*/
1203	if (has_pushable_dl_tasks(rq)) {
1204	/*
1205	* Nothing relies on rq->lock after this, so its safe to drop
1206	* rq->lock.
1207	*/
1208	rq_unpin_lock(rq, rf);
1209	push_dl_task(rq);
1210	rq_repin_lock(rq, rf);
1211	}
1212	#endif
1213	}
1214
1215	/ a defer timer will not be reset if the runtime consumed was < dl_server_min_res /
1216	static const u64 dl_server_min_res = `1` * NSEC_PER_MSEC;
1217
1218	static enum hrtimer_restart dl_server_timer(struct hrtimer timer, struct* sched_dl_entity *dl_se)
1219	{
1220	struct rq *rq = rq_of_dl_se(dl_se);
1221	u64 fw;
1222
1223	scoped_guard (rq_lock, rq) {
1224	struct rq_flags *rf = &scope.rf;
1225
1226	if (!dl_se->dl_throttled \|\| !dl_se->dl_runtime)
1227	return HRTIMER_NORESTART;
1228
1229	sched_clock_tick();
1230	update_rq_clock(rq);
1231
1232	if (!dl_se->dl_runtime)
1233	return HRTIMER_NORESTART;
1234
1235	if (!dl_se->server_has_tasks(dl_se)) {
1236	replenish_dl_entity(dl_se);
1237	return HRTIMER_NORESTART;
1238	}
1239
1240	if (dl_se->dl_defer_armed) {
1241	/*
1242	* First check if the server could consume runtime in background.
1243	* If so, it is possible to push the defer timer for this amount
1244	* of time. The dl_server_min_res serves as a limit to avoid
1245	* forwarding the timer for a too small amount of time.
1246	*/
1247	if (dl_time_before(a: rq_clock(rq: dl_se->rq),
1248	b: (dl_se->deadline - dl_se->runtime - dl_server_min_res))) {
1249
1250	/ reset the defer timer /
1251	fw = dl_se->deadline - rq_clock(rq: dl_se->rq) - dl_se->runtime;
1252
1253	hrtimer_forward_now(timer, interval: ns_to_ktime(ns: fw));
1254	return HRTIMER_RESTART;
1255	}
1256
1257	dl_se->dl_defer_running = `1`;
1258	}
1259
1260	enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);
1261
1262	if (!dl_task(p: dl_se->rq->curr) \|\| dl_entity_preempt(a: dl_se, b: &dl_se->rq->curr->dl))
1263	resched_curr(rq);
1264
1265	__push_dl_task(rq, rf);
1266	}
1267
1268	return HRTIMER_NORESTART;
1269	}
1270
1271	/*
1272	* This is the bandwidth enforcement timer callback. If here, we know
1273	* a task is not on its dl_rq, since the fact that the timer was running
1274	* means the task is throttled and needs a runtime replenishment.
1275	*
1276	* However, what we actually do depends on the fact the task is active,
1277	* (it is on its rq) or has been removed from there by a call to
1278	* dequeue_task_dl(). In the former case we must issue the runtime
1279	* replenishment and add the task back to the dl_rq; in the latter, we just
1280	* do nothing but clearing dl_throttled, so that runtime and deadline
1281	* updating (and the queueing back to dl_rq) will be done by the
1282	* next call to enqueue_task_dl().
1283	*/
1284	static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
1285	{
1286	struct sched_dl_entity *dl_se = container_of(timer,
1287	struct sched_dl_entity,
1288	dl_timer);
1289	struct task_struct *p;
1290	struct rq_flags rf;
1291	struct rq *rq;
1292
1293	if (dl_server(dl_se))
1294	return dl_server_timer(timer, dl_se);
1295
1296	p = dl_task_of(dl_se);
1297	rq = task_rq_lock(p, rf: &rf);
1298
1299	/*
1300	* The task might have changed its scheduling policy to something
1301	* different than SCHED_DEADLINE (through switched_from_dl()).
1302	*/
1303	if (!dl_task(p))
1304	goto unlock;
1305
1306	/*
1307	* The task might have been boosted by someone else and might be in the
1308	* boosting/deboosting path, its not throttled.
1309	*/
1310	if (is_dl_boosted(dl_se))
1311	goto unlock;
1312
1313	/*
1314	* Spurious timer due to start_dl_timer() race; or we already received
1315	* a replenishment from rt_mutex_setprio().
1316	*/
1317	if (!dl_se->dl_throttled)
1318	goto unlock;
1319
1320	sched_clock_tick();
1321	update_rq_clock(rq);
1322
1323	/*
1324	* If the throttle happened during sched-out; like:
1325	*
1326	* schedule()
1327	* deactivate_task()
1328	* dequeue_task_dl()
1329	* update_curr_dl()
1330	* start_dl_timer()
1331	* __dequeue_task_dl()
1332	* prev->on_rq = 0;
1333	*
1334	* We can be both throttled and !queued. Replenish the counter
1335	* but do not enqueue -- wait for our wakeup to do that.
1336	*/
1337	if (!task_on_rq_queued(p)) {
1338	replenish_dl_entity(dl_se);
1339	goto unlock;
1340	}
1341
1342	#ifdef CONFIG_SMP
1343	if (unlikely(!rq->online)) {
1344	/*
1345	* If the runqueue is no longer available, migrate the
1346	* task elsewhere. This necessarily changes rq.
1347	*/
1348	lockdep_unpin_lock(__rq_lockp(rq), rf.cookie);
1349	rq = dl_task_offline_migration(rq, p);
1350	rf.cookie = lockdep_pin_lock(__rq_lockp(rq));
1351	update_rq_clock(rq);
1352
1353	/*
1354	* Now that the task has been migrated to the new RQ and we
1355	* have that locked, proceed as normal and enqueue the task
1356	* there.
1357	*/
1358	}
1359	#endif
1360
1361	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
1362	if (dl_task(p: rq->donor))
1363	wakeup_preempt_dl(rq, p, flags: `0`);
1364	else
1365	resched_curr(rq);
1366
1367	__push_dl_task(rq, rf: &rf);
1368
1369	unlock:
1370	task_rq_unlock(rq, p, rf: &rf);
1371
1372	/*
1373	* This can free the task_struct, including this hrtimer, do not touch
1374	* anything related to that after this.
1375	*/
1376	put_task_struct(t: p);
1377
1378	return HRTIMER_NORESTART;
1379	}
1380
1381	static void init_dl_task_timer(struct sched_dl_entity *dl_se)
1382	{
1383	struct hrtimer *timer = &dl_se->dl_timer;
1384
1385	hrtimer_setup(timer, function: dl_task_timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL_HARD);
1386	}
1387
1388	/*
1389	* During the activation, CBS checks if it can reuse the current task's
1390	* runtime and period. If the deadline of the task is in the past, CBS
1391	* cannot use the runtime, and so it replenishes the task. This rule
1392	* works fine for implicit deadline tasks (deadline == period), and the
1393	* CBS was designed for implicit deadline tasks. However, a task with
1394	* constrained deadline (deadline < period) might be awakened after the
1395	* deadline, but before the next period. In this case, replenishing the
1396	* task would allow it to run for runtime / deadline. As in this case
1397	* deadline < period, CBS enables a task to run for more than the
1398	* runtime / period. In a very loaded system, this can cause a domino
1399	* effect, making other tasks miss their deadlines.
1400	*
1401	* To avoid this problem, in the activation of a constrained deadline
1402	* task after the deadline but before the next period, throttle the
1403	* task and set the replenishing timer to the begin of the next period,
1404	* unless it is boosted.
1405	*/
1406	static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
1407	{
1408	struct rq *rq = rq_of_dl_se(dl_se);
1409
1410	if (dl_time_before(a: dl_se->deadline, b: rq_clock(rq)) &&
1411	dl_time_before(a: rq_clock(rq), b: dl_next_period(dl_se))) {
1412	if (unlikely(is_dl_boosted(dl_se) \|\| !start_dl_timer(dl_se)))
1413	return;
1414	dl_se->dl_throttled = `1`;
1415	if (dl_se->runtime > `0`)
1416	dl_se->runtime = `0`;
1417	}
1418	}
1419
1420	static
1421	int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
1422	{
1423	return (dl_se->runtime <= `0`);
1424	}
1425
1426	/*
1427	* This function implements the GRUB accounting rule. According to the
1428	* GRUB reclaiming algorithm, the runtime is not decreased as "dq = -dt",
1429	* but as "dq = -(max{u, (Umax - Uinact - Uextra)} / Umax) dt",
1430	* where u is the utilization of the task, Umax is the maximum reclaimable
1431	* utilization, Uinact is the (per-runqueue) inactive utilization, computed
1432	* as the difference between the "total runqueue utilization" and the
1433	* "runqueue active utilization", and Uextra is the (per runqueue) extra
1434	* reclaimable utilization.
1435	* Since rq->dl.running_bw and rq->dl.this_bw contain utilizations multiplied
1436	* by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT.
1437	* Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT, dl_bw
1438	* is multiplied by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
1439	* Since delta is a 64 bit variable, to have an overflow its value should be
1440	* larger than 2^(64 - 20 - 8), which is more than 64 seconds. So, overflow is
1441	* not an issue here.
1442	*/
1443	static u64 grub_reclaim(u64 delta, struct rq rq, struct* sched_dl_entity *dl_se)
1444	{
1445	u64 u_act;
1446	u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; / Utot - Uact /
1447
1448	/*
1449	* Instead of computing max{u, (u_max - u_inact - u_extra)}, we
1450	* compare u_inact + u_extra with u_max - u, because u_inact + u_extra
1451	* can be larger than u_max. So, u_max - u_inact - u_extra would be
1452	* negative leading to wrong results.
1453	*/
1454	if (u_inact + rq->dl.extra_bw > rq->dl.max_bw - dl_se->dl_bw)
1455	u_act = dl_se->dl_bw;
1456	else
1457	u_act = rq->dl.max_bw - u_inact - rq->dl.extra_bw;
1458
1459	u_act = (u_act * rq->dl.bw_ratio) >> RATIO_SHIFT;
1460	return (delta * u_act) >> BW_SHIFT;
1461	}
1462
1463	s64 dl_scaled_delta_exec(struct rq rq, struct* sched_dl_entity *dl_se, s64 delta_exec)
1464	{
1465	s64 scaled_delta_exec;
1466
1467	/*
1468	* For tasks that participate in GRUB, we implement GRUB-PA: the
1469	* spare reclaimed bandwidth is used to clock down frequency.
1470	*
1471	* For the others, we still need to scale reservation parameters
1472	* according to current frequency and CPU maximum capacity.
1473	*/
1474	if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
1475	scaled_delta_exec = grub_reclaim(delta: delta_exec, rq, dl_se);
1476	} else {
1477	int cpu = cpu_of(rq);
1478	unsigned long scale_freq = arch_scale_freq_capacity(cpu);
1479	unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
1480
1481	scaled_delta_exec = cap_scale(delta_exec, scale_freq);
1482	scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
1483	}
1484
1485	return scaled_delta_exec;
1486	}
1487
1488	static inline void
1489	update_stats_dequeue_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se,
1490	int flags);
1491	static void update_curr_dl_se(struct rq rq, struct* sched_dl_entity *dl_se, s64 delta_exec)
1492	{
1493	s64 scaled_delta_exec;
1494
1495	if (unlikely(delta_exec <= `0`)) {
1496	if (unlikely(dl_se->dl_yielded))
1497	goto throttle;
1498	return;
1499	}
1500
1501	if (dl_server(dl_se) && dl_se->dl_throttled && !dl_se->dl_defer)
1502	return;
1503
1504	if (dl_entity_is_special(dl_se))
1505	return;
1506
1507	scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se, delta_exec);
1508
1509	dl_se->runtime -= scaled_delta_exec;
1510
1511	/*
1512	* The fair server can consume its runtime while throttled (not queued/
1513	* running as regular CFS).
1514	*
1515	* If the server consumes its entire runtime in this state. The server
1516	* is not required for the current period. Thus, reset the server by
1517	* starting a new period, pushing the activation.
1518	*/
1519	if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) {
1520	/*
1521	* If the server was previously activated - the starving condition
1522	* took place, it this point it went away because the fair scheduler
1523	* was able to get runtime in background. So return to the initial
1524	* state.
1525	*/
1526	dl_se->dl_defer_running = `0`;
1527
1528	hrtimer_try_to_cancel(timer: &dl_se->dl_timer);
1529
1530	replenish_dl_new_period(dl_se, rq: dl_se->rq);
1531
1532	/*
1533	* Not being able to start the timer seems problematic. If it could not
1534	* be started for whatever reason, we need to "unthrottle" the DL server
1535	* and queue right away. Otherwise nothing might queue it. That's similar
1536	* to what enqueue_dl_entity() does on start_dl_timer==0. For now, just warn.
1537	*/
1538	WARN_ON_ONCE(!start_dl_timer(dl_se));
1539
1540	return;
1541	}
1542
1543	throttle:
1544	if (dl_runtime_exceeded(dl_se) \|\| dl_se->dl_yielded) {
1545	dl_se->dl_throttled = `1`;
1546
1547	/ If requested, inform the user about runtime overruns. /
1548	if (dl_runtime_exceeded(dl_se) &&
1549	(dl_se->flags & SCHED_FLAG_DL_OVERRUN))
1550	dl_se->dl_overrun = `1`;
1551
1552	dequeue_dl_entity(dl_se, flags: `0`);
1553	if (!dl_server(dl_se)) {
1554	update_stats_dequeue_dl(dl_rq: &rq->dl, dl_se, flags: `0`);
1555	dequeue_pushable_dl_task(rq, p: dl_task_of(dl_se));
1556	}
1557
1558	if (unlikely(is_dl_boosted(dl_se) \|\| !start_dl_timer(dl_se))) {
1559	if (dl_server(dl_se))
1560	enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);
1561	else
1562	enqueue_task_dl(rq, p: dl_task_of(dl_se), ENQUEUE_REPLENISH);
1563	}
1564
1565	if (!is_leftmost(dl_se, dl_rq: &rq->dl))
1566	resched_curr(rq);
1567	}
1568
1569	/*
1570	* The fair server (sole dl_server) does not account for real-time
1571	* workload because it is running fair work.
1572	*/
1573	if (dl_se == &rq->fair_server)
1574	return;
1575
1576	#ifdef CONFIG_RT_GROUP_SCHED
1577	/*
1578	* Because -- for now -- we share the rt bandwidth, we need to
1579	* account our runtime there too, otherwise actual rt tasks
1580	* would be able to exceed the shared quota.
1581	*
1582	* Account to the root rt group for now.
1583	*
1584	* The solution we're working towards is having the RT groups scheduled
1585	* using deadline servers -- however there's a few nasties to figure
1586	* out before that can happen.
1587	*/
1588	if (rt_bandwidth_enabled()) {
1589	struct rt_rq *rt_rq = &rq->rt;
1590
1591	raw_spin_lock(&rt_rq->rt_runtime_lock);
1592	/*
1593	* We'll let actual RT tasks worry about the overflow here, we
1594	* have our own CBS to keep us inline; only account when RT
1595	* bandwidth is relevant.
1596	*/
1597	if (sched_rt_bandwidth_account(rt_rq))
1598	rt_rq->rt_time += delta_exec;
1599	raw_spin_unlock(&rt_rq->rt_runtime_lock);
1600	}
1601	#endif
1602	}
1603
1604	/*
1605	* In the non-defer mode, the idle time is not accounted, as the
1606	* server provides a guarantee.
1607	*
1608	* If the dl_server is in defer mode, the idle time is also considered
1609	* as time available for the fair server, avoiding a penalty for the
1610	* rt scheduler that did not consumed that time.
1611	*/
1612	void dl_server_update_idle_time(struct rq rq, struct* task_struct *p)
1613	{
1614	s64 delta_exec, scaled_delta_exec;
1615
1616	if (!rq->fair_server.dl_defer)
1617	return;
1618
1619	/ no need to discount more /
1620	if (rq->fair_server.runtime < `0`)
1621	return;
1622
1623	delta_exec = rq_clock_task(rq) - p->se.exec_start;
1624	if (delta_exec < `0`)
1625	return;
1626
1627	scaled_delta_exec = dl_scaled_delta_exec(rq, dl_se: &rq->fair_server, delta_exec);
1628
1629	rq->fair_server.runtime -= scaled_delta_exec;
1630
1631	if (rq->fair_server.runtime < `0`) {
1632	rq->fair_server.dl_defer_running = `0`;
1633	rq->fair_server.runtime = `0`;
1634	}
1635
1636	p->se.exec_start = rq_clock_task(rq);
1637	}
1638
1639	void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
1640	{
1641	/ 0 runtime = fair server disabled /
1642	if (dl_se->dl_runtime)
1643	update_curr_dl_se(rq: dl_se->rq, dl_se, delta_exec);
1644	}
1645
1646	void dl_server_start(struct sched_dl_entity *dl_se)
1647	{
1648	struct rq *rq = dl_se->rq;
1649
1650	/*
1651	* XXX: the apply do not work fine at the init phase for the
1652	* fair server because things are not yet set. We need to improve
1653	* this before getting generic.
1654	*/
1655	if (!dl_server(dl_se)) {
1656	u64 runtime = `50` * NSEC_PER_MSEC;
1657	u64 period = `1000` * NSEC_PER_MSEC;
1658
1659	dl_server_apply_params(dl_se, runtime, period, init: `1`);
1660
1661	dl_se->dl_server = `1`;
1662	dl_se->dl_defer = `1`;
1663	setup_new_dl_entity(dl_se);
1664	}
1665
1666	if (!dl_se->dl_runtime)
1667	return;
1668
1669	dl_se->dl_server_active = `1`;
1670	enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
1671	if (!dl_task(p: dl_se->rq->curr) \|\| dl_entity_preempt(a: dl_se, b: &rq->curr->dl))
1672	resched_curr(rq: dl_se->rq);
1673	}
1674
1675	void dl_server_stop(struct sched_dl_entity *dl_se)
1676	{
1677	if (!dl_se->dl_runtime)
1678	return;
1679
1680	dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
1681	hrtimer_try_to_cancel(timer: &dl_se->dl_timer);
1682	dl_se->dl_defer_armed = `0`;
1683	dl_se->dl_throttled = `0`;
1684	dl_se->dl_server_active = `0`;
1685	}
1686
1687	void dl_server_init(struct sched_dl_entity dl_se, struct* rq *rq,
1688	dl_server_has_tasks_f has_tasks,
1689	dl_server_pick_f pick_task)
1690	{
1691	dl_se->rq = rq;
1692	dl_se->server_has_tasks = has_tasks;
1693	dl_se->server_pick_task = pick_task;
1694	}
1695
1696	void __dl_server_attach_root(struct sched_dl_entity dl_se, struct* rq *rq)
1697	{
1698	u64 new_bw = dl_se->dl_bw;
1699	int cpu = cpu_of(rq);
1700	struct dl_bw *dl_b;
1701
1702	dl_b = dl_bw_of(i: cpu_of(rq));
1703	guard(raw_spinlock)(l: &dl_b->lock);
1704
1705	if (!dl_bw_cpus(i: cpu))
1706	return;
1707
1708	__dl_add(dl_b, tsk_bw: new_bw, cpus: dl_bw_cpus(i: cpu));
1709	}
1710
1711	int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 period, bool init)
1712	{
1713	u64 old_bw = init ? `0` : to_ratio(period: dl_se->dl_period, runtime: dl_se->dl_runtime);
1714	u64 new_bw = to_ratio(period, runtime);
1715	struct rq *rq = dl_se->rq;
1716	int cpu = cpu_of(rq);
1717	struct dl_bw *dl_b;
1718	unsigned long cap;
1719	int retval = `0`;
1720	int cpus;
1721
1722	dl_b = dl_bw_of(i: cpu);
1723	guard(raw_spinlock)(l: &dl_b->lock);
1724
1725	cpus = dl_bw_cpus(i: cpu);
1726	cap = dl_bw_capacity(i: cpu);
1727
1728	if (__dl_overflow(dl_b, cap, old_bw, new_bw))
1729	return -EBUSY;
1730
1731	if (init) {
1732	__add_rq_bw(dl_bw: new_bw, dl_rq: &rq->dl);
1733	__dl_add(dl_b, tsk_bw: new_bw, cpus);
1734	} else {
1735	__dl_sub(dl_b, tsk_bw: dl_se->dl_bw, cpus);
1736	__dl_add(dl_b, tsk_bw: new_bw, cpus);
1737
1738	dl_rq_change_utilization(rq, dl_se, new_bw);
1739	}
1740
1741	dl_se->dl_runtime = runtime;
1742	dl_se->dl_deadline = period;
1743	dl_se->dl_period = period;
1744
1745	dl_se->runtime = `0`;
1746	dl_se->deadline = `0`;
1747
1748	dl_se->dl_bw = to_ratio(period: dl_se->dl_period, runtime: dl_se->dl_runtime);
1749	dl_se->dl_density = to_ratio(period: dl_se->dl_deadline, runtime: dl_se->dl_runtime);
1750
1751	return retval;
1752	}
1753
1754	/*
1755	* Update the current task's runtime statistics (provided it is still
1756	* a -deadline task and has not been removed from the dl_rq).
1757	*/
1758	static void update_curr_dl(struct rq *rq)
1759	{
1760	struct task_struct *donor = rq->donor;
1761	struct sched_dl_entity *dl_se = &donor->dl;
1762	s64 delta_exec;
1763
1764	if (!dl_task(p: donor) \|\| !on_dl_rq(dl_se))
1765	return;
1766
1767	/*
1768	* Consumed budget is computed considering the time as
1769	* observed by schedulable tasks (excluding time spent
1770	* in hardirq context, etc.). Deadlines are instead
1771	* computed using hard walltime. This seems to be the more
1772	* natural solution, but the full ramifications of this
1773	* approach need further study.
1774	*/
1775	delta_exec = update_curr_common(rq);
1776	update_curr_dl_se(rq, dl_se, delta_exec);
1777	}
1778
1779	static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
1780	{
1781	struct sched_dl_entity *dl_se = container_of(timer,
1782	struct sched_dl_entity,
1783	inactive_timer);
1784	struct task_struct *p = NULL;
1785	struct rq_flags rf;
1786	struct rq *rq;
1787
1788	if (!dl_server(dl_se)) {
1789	p = dl_task_of(dl_se);
1790	rq = task_rq_lock(p, rf: &rf);
1791	} else {
1792	rq = dl_se->rq;
1793	rq_lock(rq, rf: &rf);
1794	}
1795
1796	sched_clock_tick();
1797	update_rq_clock(rq);
1798
1799	if (dl_server(dl_se))
1800	goto no_task;
1801
1802	if (!dl_task(p) \|\| READ_ONCE(p->__state) == TASK_DEAD) {
1803	struct dl_bw *dl_b = dl_bw_of(i: task_cpu(p));
1804
1805	if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) {
1806	sub_running_bw(dl_se: &p->dl, dl_rq: dl_rq_of_se(dl_se: &p->dl));
1807	sub_rq_bw(dl_se: &p->dl, dl_rq: dl_rq_of_se(dl_se: &p->dl));
1808	dl_se->dl_non_contending = `0`;
1809	}
1810
1811	raw_spin_lock(&dl_b->lock);
1812	__dl_sub(dl_b, tsk_bw: p->dl.dl_bw, cpus: dl_bw_cpus(i: task_cpu(p)));
1813	raw_spin_unlock(&dl_b->lock);
1814	__dl_clear_params(dl_se);
1815
1816	goto unlock;
1817	}
1818
1819	no_task:
1820	if (dl_se->dl_non_contending == `0`)
1821	goto unlock;
1822
1823	sub_running_bw(dl_se, dl_rq: &rq->dl);
1824	dl_se->dl_non_contending = `0`;
1825	unlock:
1826
1827	if (!dl_server(dl_se)) {
1828	task_rq_unlock(rq, p, rf: &rf);
1829	put_task_struct(t: p);
1830	} else {
1831	rq_unlock(rq, rf: &rf);
1832	}
1833
1834	return HRTIMER_NORESTART;
1835	}
1836
1837	static void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
1838	{
1839	struct hrtimer *timer = &dl_se->inactive_timer;
1840
1841	hrtimer_setup(timer, function: inactive_task_timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL_HARD);
1842	}
1843
1844	#define __node_2_dle(node) \
1845	rb_entry((node), struct sched_dl_entity, rb_node)
1846
1847	#ifdef CONFIG_SMP
1848
1849	static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
1850	{
1851	struct rq *rq = rq_of_dl_rq(dl_rq);
1852
1853	if (dl_rq->earliest_dl.curr == `0` \|\|
1854	dl_time_before(a: deadline, b: dl_rq->earliest_dl.curr)) {
1855	if (dl_rq->earliest_dl.curr == `0`)
1856	cpupri_set(cp: &rq->rd->cpupri, cpu: rq->cpu, CPUPRI_HIGHER);
1857	dl_rq->earliest_dl.curr = deadline;
1858	cpudl_set(cp: &rq->rd->cpudl, cpu: rq->cpu, dl: deadline);
1859	}
1860	}
1861
1862	static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
1863	{
1864	struct rq *rq = rq_of_dl_rq(dl_rq);
1865
1866	/*
1867	* Since we may have removed our earliest (and/or next earliest)
1868	* task we must recompute them.
1869	*/
1870	if (!dl_rq->dl_nr_running) {
1871	dl_rq->earliest_dl.curr = `0`;
1872	dl_rq->earliest_dl.next = `0`;
1873	cpudl_clear(cp: &rq->rd->cpudl, cpu: rq->cpu);
1874	cpupri_set(cp: &rq->rd->cpupri, cpu: rq->cpu, pri: rq->rt.highest_prio.curr);
1875	} else {
1876	struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
1877	struct sched_dl_entity *entry = __node_2_dle(leftmost);
1878
1879	dl_rq->earliest_dl.curr = entry->deadline;
1880	cpudl_set(cp: &rq->rd->cpudl, cpu: rq->cpu, dl: entry->deadline);
1881	}
1882	}
1883
1884	#else
1885
1886	static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
1887	static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
1888
1889	#endif /* CONFIG_SMP */
1890
1891	static inline
1892	void inc_dl_tasks(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
1893	{
1894	u64 deadline = dl_se->deadline;
1895
1896	dl_rq->dl_nr_running++;
1897	add_nr_running(rq: rq_of_dl_rq(dl_rq), count: `1`);
1898
1899	inc_dl_deadline(dl_rq, deadline);
1900	}
1901
1902	static inline
1903	void dec_dl_tasks(struct sched_dl_entity dl_se, struct* dl_rq *dl_rq)
1904	{
1905	WARN_ON(!dl_rq->dl_nr_running);
1906	dl_rq->dl_nr_running--;
1907	sub_nr_running(rq: rq_of_dl_rq(dl_rq), count: `1`);
1908
1909	dec_dl_deadline(dl_rq, deadline: dl_se->deadline);
1910	}
1911
1912	static inline bool __dl_less(struct rb_node a, const* struct rb_node *b)
1913	{
1914	return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
1915	}
1916
1917	static __always_inline struct sched_statistics *
1918	__schedstats_from_dl_se(struct sched_dl_entity *dl_se)
1919	{
1920	if (!schedstat_enabled())
1921	return NULL;
1922
1923	if (dl_server(dl_se))
1924	return NULL;
1925
1926	return &dl_task_of(dl_se)->stats;
1927	}
1928
1929	static inline void
1930	update_stats_wait_start_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se)
1931	{
1932	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
1933	if (stats)
1934	__update_stats_wait_start(rq: rq_of_dl_rq(dl_rq), p: dl_task_of(dl_se), stats);
1935	}
1936
1937	static inline void
1938	update_stats_wait_end_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se)
1939	{
1940	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
1941	if (stats)
1942	__update_stats_wait_end(rq: rq_of_dl_rq(dl_rq), p: dl_task_of(dl_se), stats);
1943	}
1944
1945	static inline void
1946	update_stats_enqueue_sleeper_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se)
1947	{
1948	struct sched_statistics *stats = __schedstats_from_dl_se(dl_se);
1949	if (stats)
1950	__update_stats_enqueue_sleeper(rq: rq_of_dl_rq(dl_rq), p: dl_task_of(dl_se), stats);
1951	}
1952
1953	static inline void
1954	update_stats_enqueue_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se,
1955	int flags)
1956	{
1957	if (!schedstat_enabled())
1958	return;
1959
1960	if (flags & ENQUEUE_WAKEUP)
1961	update_stats_enqueue_sleeper_dl(dl_rq, dl_se);
1962	}
1963
1964	static inline void
1965	update_stats_dequeue_dl(struct dl_rq dl_rq, struct* sched_dl_entity *dl_se,
1966	int flags)
1967	{
1968	struct task_struct *p = dl_task_of(dl_se);
1969
1970	if (!schedstat_enabled())
1971	return;
1972
1973	if ((flags & DEQUEUE_SLEEP)) {
1974	unsigned int state;
1975
1976	state = READ_ONCE(p->__state);
1977	if (state & TASK_INTERRUPTIBLE)
1978	__schedstat_set(p->stats.sleep_start,
1979	rq_clock(rq_of_dl_rq(dl_rq)));
1980
1981	if (state & TASK_UNINTERRUPTIBLE)
1982	__schedstat_set(p->stats.block_start,
1983	rq_clock(rq_of_dl_rq(dl_rq)));
1984	}
1985	}
1986
1987	static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
1988	{
1989	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
1990
1991	WARN_ON_ONCE(!RB_EMPTY_NODE(&dl_se->rb_node));
1992
1993	rb_add_cached(node: &dl_se->rb_node, tree: &dl_rq->root, less: __dl_less);
1994
1995	inc_dl_tasks(dl_se, dl_rq);
1996	}
1997
1998	static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
1999	{
2000	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
2001
2002	if (RB_EMPTY_NODE(&dl_se->rb_node))
2003	return;
2004
2005	rb_erase_cached(node: &dl_se->rb_node, root: &dl_rq->root);
2006
2007	RB_CLEAR_NODE(&dl_se->rb_node);
2008
2009	dec_dl_tasks(dl_se, dl_rq);
2010	}
2011
2012	static void
2013	enqueue_dl_entity(struct sched_dl_entity dl_se, int* flags)
2014	{
2015	WARN_ON_ONCE(on_dl_rq(dl_se));
2016
2017	update_stats_enqueue_dl(dl_rq: dl_rq_of_se(dl_se), dl_se, flags);
2018
2019	/*
2020	* Check if a constrained deadline task was activated
2021	* after the deadline but before the next period.
2022	* If that is the case, the task will be throttled and
2023	* the replenishment timer will be set to the next period.
2024	*/
2025	if (!dl_se->dl_throttled && !dl_is_implicit(dl_se))
2026	dl_check_constrained_dl(dl_se);
2027
2028	if (flags & (ENQUEUE_RESTORE\|ENQUEUE_MIGRATING)) {
2029	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
2030
2031	add_rq_bw(dl_se, dl_rq);
2032	add_running_bw(dl_se, dl_rq);
2033	}
2034
2035	/*
2036	* If p is throttled, we do not enqueue it. In fact, if it exhausted
2037	* its budget it needs a replenishment and, since it now is on
2038	* its rq, the bandwidth timer callback (which clearly has not
2039	* run yet) will take care of this.
2040	* However, the active utilization does not depend on the fact
2041	* that the task is on the runqueue or not (but depends on the
2042	* task's state - in GRUB parlance, "inactive" vs "active contending").
2043	* In other words, even if a task is throttled its utilization must
2044	* be counted in the active utilization; hence, we need to call
2045	* add_running_bw().
2046	*/
2047	if (!dl_se->dl_defer && dl_se->dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
2048	if (flags & ENQUEUE_WAKEUP)
2049	task_contending(dl_se, flags);
2050
2051	return;
2052	}
2053
2054	/*
2055	* If this is a wakeup or a new instance, the scheduling
2056	* parameters of the task might need updating. Otherwise,
2057	* we want a replenishment of its runtime.
2058	*/
2059	if (flags & ENQUEUE_WAKEUP) {
2060	task_contending(dl_se, flags);
2061	update_dl_entity(dl_se);
2062	} else if (flags & ENQUEUE_REPLENISH) {
2063	replenish_dl_entity(dl_se);
2064	} else if ((flags & ENQUEUE_RESTORE) &&
2065	!is_dl_boosted(dl_se) &&
2066	dl_time_before(a: dl_se->deadline, b: rq_clock(rq: rq_of_dl_se(dl_se)))) {
2067	setup_new_dl_entity(dl_se);
2068	}
2069
2070	/*
2071	* If the reservation is still throttled, e.g., it got replenished but is a
2072	* deferred task and still got to wait, don't enqueue.
2073	*/
2074	if (dl_se->dl_throttled && start_dl_timer(dl_se))
2075	return;
2076
2077	/*
2078	* We're about to enqueue, make sure we're not ->dl_throttled!
2079	* In case the timer was not started, say because the defer time
2080	* has passed, mark as not throttled and mark unarmed.
2081	* Also cancel earlier timers, since letting those run is pointless.
2082	*/
2083	if (dl_se->dl_throttled) {
2084	hrtimer_try_to_cancel(timer: &dl_se->dl_timer);
2085	dl_se->dl_defer_armed = `0`;
2086	dl_se->dl_throttled = `0`;
2087	}
2088
2089	__enqueue_dl_entity(dl_se);
2090	}
2091
2092	static void dequeue_dl_entity(struct sched_dl_entity dl_se, int* flags)
2093	{
2094	__dequeue_dl_entity(dl_se);
2095
2096	if (flags & (DEQUEUE_SAVE\|DEQUEUE_MIGRATING)) {
2097	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
2098
2099	sub_running_bw(dl_se, dl_rq);
2100	sub_rq_bw(dl_se, dl_rq);
2101	}
2102
2103	/*
2104	* This check allows to start the inactive timer (or to immediately
2105	* decrease the active utilization, if needed) in two cases:
2106	* when the task blocks and when it is terminating
2107	* (p->state == TASK_DEAD). We can handle the two cases in the same
2108	* way, because from GRUB's point of view the same thing is happening
2109	* (the task moves from "active contending" to "active non contending"
2110	* or "inactive")
2111	*/
2112	if (flags & DEQUEUE_SLEEP)
2113	task_non_contending(dl_se);
2114	}
2115
2116	static void enqueue_task_dl(struct rq rq, struct* task_struct p, int* flags)
2117	{
2118	if (is_dl_boosted(dl_se: &p->dl)) {
2119	/*
2120	* Because of delays in the detection of the overrun of a
2121	* thread's runtime, it might be the case that a thread
2122	* goes to sleep in a rt mutex with negative runtime. As
2123	* a consequence, the thread will be throttled.
2124	*
2125	* While waiting for the mutex, this thread can also be
2126	* boosted via PI, resulting in a thread that is throttled
2127	* and boosted at the same time.
2128	*
2129	* In this case, the boost overrides the throttle.
2130	*/
2131	if (p->dl.dl_throttled) {
2132	/*
2133	* The replenish timer needs to be canceled. No
2134	* problem if it fires concurrently: boosted threads
2135	* are ignored in dl_task_timer().
2136	*/
2137	cancel_replenish_timer(dl_se: &p->dl);
2138	p->dl.dl_throttled = `0`;
2139	}
2140	} else if (!dl_prio(prio: p->normal_prio)) {
2141	/*
2142	* Special case in which we have a !SCHED_DEADLINE task that is going
2143	* to be deboosted, but exceeds its runtime while doing so. No point in
2144	* replenishing it, as it's going to return back to its original
2145	* scheduling class after this. If it has been throttled, we need to
2146	* clear the flag, otherwise the task may wake up as throttled after
2147	* being boosted again with no means to replenish the runtime and clear
2148	* the throttle.
2149	*/
2150	p->dl.dl_throttled = `0`;
2151	if (!(flags & ENQUEUE_REPLENISH))
2152	printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
2153	task_pid_nr(p));
2154
2155	return;
2156	}
2157
2158	check_schedstat_required();
2159	update_stats_wait_start_dl(dl_rq: dl_rq_of_se(dl_se: &p->dl), dl_se: &p->dl);
2160
2161	if (p->on_rq == TASK_ON_RQ_MIGRATING)
2162	flags \|= ENQUEUE_MIGRATING;
2163
2164	enqueue_dl_entity(dl_se: &p->dl, flags);
2165
2166	if (dl_server(dl_se: &p->dl))
2167	return;
2168
2169	if (!task_current(rq, p) && !p->dl.dl_throttled && p->nr_cpus_allowed > `1`)
2170	enqueue_pushable_dl_task(rq, p);
2171	}
2172
2173	static bool dequeue_task_dl(struct rq rq, struct* task_struct p, int* flags)
2174	{
2175	update_curr_dl(rq);
2176
2177	if (p->on_rq == TASK_ON_RQ_MIGRATING)
2178	flags \|= DEQUEUE_MIGRATING;
2179
2180	dequeue_dl_entity(dl_se: &p->dl, flags);
2181	if (!p->dl.dl_throttled && !dl_server(dl_se: &p->dl))
2182	dequeue_pushable_dl_task(rq, p);
2183
2184	return true;
2185	}
2186
2187	/*
2188	* Yield task semantic for -deadline tasks is:
2189	*
2190	* get off from the CPU until our next instance, with
2191	* a new runtime. This is of little use now, since we
2192	* don't have a bandwidth reclaiming mechanism. Anyway,
2193	* bandwidth reclaiming is planned for the future, and
2194	* yield_task_dl will indicate that some spare budget
2195	* is available for other task instances to use it.
2196	*/
2197	static void yield_task_dl(struct rq *rq)
2198	{
2199	/*
2200	* We make the task go to sleep until its current deadline by
2201	* forcing its runtime to zero. This way, update_curr_dl() stops
2202	* it and the bandwidth timer will wake it up and will give it
2203	* new scheduling parameters (thanks to dl_yielded=1).
2204	*/
2205	rq->curr->dl.dl_yielded = `1`;
2206
2207	update_rq_clock(rq);
2208	update_curr_dl(rq);
2209	/*
2210	* Tell update_rq_clock() that we've just updated,
2211	* so we don't do microscopic update in schedule()
2212	* and double the fastpath cost.
2213	*/
2214	rq_clock_skip_update(rq);
2215	}
2216
2217	#ifdef CONFIG_SMP
2218
2219	static inline bool dl_task_is_earliest_deadline(struct task_struct *p,
2220	struct rq *rq)
2221	{
2222	return (!rq->dl.dl_nr_running \|\|
2223	dl_time_before(a: p->dl.deadline,
2224	b: rq->dl.earliest_dl.curr));
2225	}
2226
2227	static int find_later_rq(struct task_struct *task);
2228
2229	static int
2230	select_task_rq_dl(struct task_struct p, int* cpu, int flags)
2231	{
2232	struct task_struct curr, donor;
2233	bool select_rq;
2234	struct rq *rq;
2235
2236	if (!(flags & WF_TTWU))
2237	goto out;
2238
2239	rq = cpu_rq(cpu);
2240
2241	rcu_read_lock();
2242	curr = READ_ONCE(rq->curr); / unlocked access /
2243	donor = READ_ONCE(rq->donor);
2244
2245	/*
2246	* If we are dealing with a -deadline task, we must
2247	* decide where to wake it up.
2248	* If it has a later deadline and the current task
2249	* on this rq can't move (provided the waking task
2250	* can!) we prefer to send it somewhere else. On the
2251	* other hand, if it has a shorter deadline, we
2252	* try to make it stay here, it might be important.
2253	*/
2254	select_rq = unlikely(dl_task(donor)) &&
2255	(curr->nr_cpus_allowed < `2` \|\|
2256	!dl_entity_preempt(a: &p->dl, b: &donor->dl)) &&
2257	p->nr_cpus_allowed > `1`;
2258
2259	/*
2260	* Take the capacity of the CPU into account to
2261	* ensure it fits the requirement of the task.
2262	*/
2263	if (sched_asym_cpucap_active())
2264	select_rq \|= !dl_task_fits_capacity(p, cpu);
2265
2266	if (select_rq) {
2267	int target = find_later_rq(task: p);
2268
2269	if (target != -`1` &&
2270	dl_task_is_earliest_deadline(p, cpu_rq(target)))
2271	cpu = target;
2272	}
2273	rcu_read_unlock();
2274
2275	out:
2276	return cpu;
2277	}
2278
2279	static void migrate_task_rq_dl(struct task_struct p, int* new_cpu __maybe_unused)
2280	{
2281	struct rq_flags rf;
2282	struct rq *rq;
2283
2284	if (READ_ONCE(p->__state) != TASK_WAKING)
2285	return;
2286
2287	rq = task_rq(p);
2288	/*
2289	* Since p->state == TASK_WAKING, set_task_cpu() has been called
2290	* from try_to_wake_up(). Hence, p->pi_lock is locked, but
2291	* rq->lock is not... So, lock it
2292	*/
2293	rq_lock(rq, rf: &rf);
2294	if (p->dl.dl_non_contending) {
2295	update_rq_clock(rq);
2296	sub_running_bw(dl_se: &p->dl, dl_rq: &rq->dl);
2297	p->dl.dl_non_contending = `0`;
2298	/*
2299	* If the timer handler is currently running and the
2300	* timer cannot be canceled, inactive_task_timer()
2301	* will see that dl_not_contending is not set, and
2302	* will not touch the rq's active utilization,
2303	* so we are still safe.
2304	*/
2305	cancel_inactive_timer(dl_se: &p->dl);
2306	}
2307	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
2308	rq_unlock(rq, rf: &rf);
2309	}
2310
2311	static void check_preempt_equal_dl(struct rq rq, struct* task_struct *p)
2312	{
2313	/*
2314	* Current can't be migrated, useless to reschedule,
2315	* let's hope p can move out.
2316	*/
2317	if (rq->curr->nr_cpus_allowed == `1` \|\|
2318	!cpudl_find(cp: &rq->rd->cpudl, p: rq->donor, NULL))
2319	return;
2320
2321	/*
2322	* p is migratable, so let's not schedule it and
2323	* see if it is pushed or pulled somewhere else.
2324	*/
2325	if (p->nr_cpus_allowed != `1` &&
2326	cpudl_find(cp: &rq->rd->cpudl, p, NULL))
2327	return;
2328
2329	resched_curr(rq);
2330	}
2331
2332	static int balance_dl(struct rq rq, struct* task_struct p, struct* rq_flags *rf)
2333	{
2334	if (!on_dl_rq(dl_se: &p->dl) && need_pull_dl_task(rq, prev: p)) {
2335	/*
2336	* This is OK, because current is on_cpu, which avoids it being
2337	* picked for load-balance and preemption/IRQs are still
2338	* disabled avoiding further scheduler activity on it and we've
2339	* not yet started the picking loop.
2340	*/
2341	rq_unpin_lock(rq, rf);
2342	pull_dl_task(rq);
2343	rq_repin_lock(rq, rf);
2344	}
2345
2346	return sched_stop_runnable(rq) \|\| sched_dl_runnable(rq);
2347	}
2348	#endif /* CONFIG_SMP */
2349
2350	/*
2351	* Only called when both the current and waking task are -deadline
2352	* tasks.
2353	*/
2354	static void wakeup_preempt_dl(struct rq rq, struct* task_struct *p,
2355	int flags)
2356	{
2357	if (dl_entity_preempt(a: &p->dl, b: &rq->donor->dl)) {
2358	resched_curr(rq);
2359	return;
2360	}
2361
2362	#ifdef CONFIG_SMP
2363	/*
2364	* In the unlikely case current and p have the same deadline
2365	* let us try to decide what's the best thing to do...
2366	*/
2367	if ((p->dl.deadline == rq->donor->dl.deadline) &&
2368	!test_tsk_need_resched(tsk: rq->curr))
2369	check_preempt_equal_dl(rq, p);
2370	#endif /* CONFIG_SMP */
2371	}
2372
2373	#ifdef CONFIG_SCHED_HRTICK
2374	static void start_hrtick_dl(struct rq rq, struct* sched_dl_entity *dl_se)
2375	{
2376	hrtick_start(rq, delay: dl_se->runtime);
2377	}
2378	#else /* !CONFIG_SCHED_HRTICK */
2379	static void start_hrtick_dl(struct rq rq, struct* sched_dl_entity *dl_se)
2380	{
2381	}
2382	#endif
2383
2384	static void set_next_task_dl(struct rq rq, struct* task_struct *p, bool first)
2385	{
2386	struct sched_dl_entity *dl_se = &p->dl;
2387	struct dl_rq *dl_rq = &rq->dl;
2388
2389	p->se.exec_start = rq_clock_task(rq);
2390	if (on_dl_rq(dl_se: &p->dl))
2391	update_stats_wait_end_dl(dl_rq, dl_se);
2392
2393	/ You can't push away the running task /
2394	dequeue_pushable_dl_task(rq, p);
2395
2396	if (!first)
2397	return;
2398
2399	if (rq->donor->sched_class != &dl_sched_class)
2400	update_dl_rq_load_avg(now: rq_clock_pelt(rq), rq, running: `0`);
2401
2402	deadline_queue_push_tasks(rq);
2403
2404	if (hrtick_enabled_dl(rq))
2405	start_hrtick_dl(rq, dl_se: &p->dl);
2406	}
2407
2408	static struct sched_dl_entity pick_next_dl_entity(struct* dl_rq *dl_rq)
2409	{
2410	struct rb_node *left = rb_first_cached(&dl_rq->root);
2411
2412	if (!left)
2413	return NULL;
2414
2415	return __node_2_dle(left);
2416	}
2417
2418	/*
2419	* __pick_next_task_dl - Helper to pick the next -deadline task to run.
2420	* @rq: The runqueue to pick the next task from.
2421	*/
2422	static struct task_struct __pick_task_dl(struct* rq *rq)
2423	{
2424	struct sched_dl_entity *dl_se;
2425	struct dl_rq *dl_rq = &rq->dl;
2426	struct task_struct *p;
2427
2428	again:
2429	if (!sched_dl_runnable(rq))
2430	return NULL;
2431
2432	dl_se = pick_next_dl_entity(dl_rq);
2433	WARN_ON_ONCE(!dl_se);
2434
2435	if (dl_server(dl_se)) {
2436	p = dl_se->server_pick_task(dl_se);
2437	if (!p) {
2438	if (dl_server_active(dl_se)) {
2439	dl_se->dl_yielded = `1`;
2440	update_curr_dl_se(rq, dl_se, delta_exec: `0`);
2441	}
2442	goto again;
2443	}
2444	rq->dl_server = dl_se;
2445	} else {
2446	p = dl_task_of(dl_se);
2447	}
2448
2449	return p;
2450	}
2451
2452	static struct task_struct pick_task_dl(struct* rq *rq)
2453	{
2454	return __pick_task_dl(rq);
2455	}
2456
2457	static void put_prev_task_dl(struct rq rq, struct* task_struct p, struct* task_struct *next)
2458	{
2459	struct sched_dl_entity *dl_se = &p->dl;
2460	struct dl_rq *dl_rq = &rq->dl;
2461
2462	if (on_dl_rq(dl_se: &p->dl))
2463	update_stats_wait_start_dl(dl_rq, dl_se);
2464
2465	update_curr_dl(rq);
2466
2467	update_dl_rq_load_avg(now: rq_clock_pelt(rq), rq, running: `1`);
2468	if (on_dl_rq(dl_se: &p->dl) && p->nr_cpus_allowed > `1`)
2469	enqueue_pushable_dl_task(rq, p);
2470	}
2471
2472	/*
2473	* scheduler tick hitting a task of our scheduling class.
2474	*
2475	* NOTE: This function can be called remotely by the tick offload that
2476	* goes along full dynticks. Therefore no local assumption can be made
2477	* and everything must be accessed through the @rq and @curr passed in
2478	* parameters.
2479	*/
2480	static void task_tick_dl(struct rq rq, struct* task_struct p, int* queued)
2481	{
2482	update_curr_dl(rq);
2483
2484	update_dl_rq_load_avg(now: rq_clock_pelt(rq), rq, running: `1`);
2485	/*
2486	* Even when we have runtime, update_curr_dl() might have resulted in us
2487	* not being the leftmost task anymore. In that case NEED_RESCHED will
2488	* be set and schedule() will start a new hrtick for the next task.
2489	*/
2490	if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > `0` &&
2491	is_leftmost(dl_se: &p->dl, dl_rq: &rq->dl))
2492	start_hrtick_dl(rq, dl_se: &p->dl);
2493	}
2494
2495	static void task_fork_dl(struct task_struct *p)
2496	{
2497	/*
2498	* SCHED_DEADLINE tasks cannot fork and this is achieved through
2499	* sched_fork()
2500	*/
2501	}
2502
2503	#ifdef CONFIG_SMP
2504
2505	/ Only try algorithms three times /
2506	#define DL_MAX_TRIES 3
2507
2508	/*
2509	* Return the earliest pushable rq's task, which is suitable to be executed
2510	* on the CPU, NULL otherwise:
2511	*/
2512	static struct task_struct pick_earliest_pushable_dl_task(struct* rq rq, int* cpu)
2513	{
2514	struct task_struct *p = NULL;
2515	struct rb_node *next_node;
2516
2517	if (!has_pushable_dl_tasks(rq))
2518	return NULL;
2519
2520	next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
2521	while (next_node) {
2522	p = __node_2_pdl(next_node);
2523
2524	if (task_is_pushable(rq, p, cpu))
2525	return p;
2526
2527	next_node = rb_next(next_node);
2528	}
2529
2530	return NULL;
2531	}
2532
2533	static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
2534
2535	static int find_later_rq(struct task_struct *task)
2536	{
2537	struct sched_domain *sd;
2538	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
2539	int this_cpu = smp_processor_id();
2540	int cpu = task_cpu(p: task);
2541
2542	/ Make sure the mask is initialized first /
2543	if (unlikely(!later_mask))
2544	return -`1`;
2545
2546	if (task->nr_cpus_allowed == `1`)
2547	return -`1`;
2548
2549	/*
2550	* We have to consider system topology and task affinity
2551	* first, then we can look for a suitable CPU.
2552	*/
2553	if (!cpudl_find(cp: &task_rq(task)->rd->cpudl, p: task, later_mask))
2554	return -`1`;
2555
2556	/*
2557	* If we are here, some targets have been found, including
2558	* the most suitable which is, among the runqueues where the
2559	* current tasks have later deadlines than the task's one, the
2560	* rq with the latest possible one.
2561	*
2562	* Now we check how well this matches with task's
2563	* affinity and system topology.
2564	*
2565	* The last CPU where the task run is our first
2566	* guess, since it is most likely cache-hot there.
2567	*/
2568	if (cpumask_test_cpu(cpu, cpumask: later_mask))
2569	return cpu;
2570	/*
2571	* Check if this_cpu is to be skipped (i.e., it is
2572	* not in the mask) or not.
2573	*/
2574	if (!cpumask_test_cpu(cpu: this_cpu, cpumask: later_mask))
2575	this_cpu = -`1`;
2576
2577	rcu_read_lock();
2578	for_each_domain(cpu, sd) {
2579	if (sd->flags & SD_WAKE_AFFINE) {
2580	int best_cpu;
2581
2582	/*
2583	* If possible, preempting this_cpu is
2584	* cheaper than migrating.
2585	*/
2586	if (this_cpu != -`1` &&
2587	cpumask_test_cpu(cpu: this_cpu, cpumask: sched_domain_span(sd))) {
2588	rcu_read_unlock();
2589	return this_cpu;
2590	}
2591
2592	best_cpu = cpumask_any_and_distribute(src1p: later_mask,
2593	src2p: sched_domain_span(sd));
2594	/*
2595	* Last chance: if a CPU being in both later_mask
2596	* and current sd span is valid, that becomes our
2597	* choice. Of course, the latest possible CPU is
2598	* already under consideration through later_mask.
2599	*/
2600	if (best_cpu < nr_cpu_ids) {
2601	rcu_read_unlock();
2602	return best_cpu;
2603	}
2604	}
2605	}
2606	rcu_read_unlock();
2607
2608	/*
2609	* At this point, all our guesses failed, we just return
2610	* 'something', and let the caller sort the things out.
2611	*/
2612	if (this_cpu != -`1`)
2613	return this_cpu;
2614
2615	cpu = cpumask_any_distribute(srcp: later_mask);
2616	if (cpu < nr_cpu_ids)
2617	return cpu;
2618
2619	return -`1`;
2620	}
2621
2622	/ Locks the rq it finds /
2623	static struct rq find_lock_later_rq(struct* task_struct task, struct* rq *rq)
2624	{
2625	struct rq *later_rq = NULL;
2626	int tries;
2627	int cpu;
2628
2629	for (tries = `0`; tries < DL_MAX_TRIES; tries++) {
2630	cpu = find_later_rq(task);
2631
2632	if ((cpu == -`1`) \|\| (cpu == rq->cpu))
2633	break;
2634
2635	later_rq = cpu_rq(cpu);
2636
2637	if (!dl_task_is_earliest_deadline(p: task, rq: later_rq)) {
2638	/*
2639	* Target rq has tasks of equal or earlier deadline,
2640	* retrying does not release any lock and is unlikely
2641	* to yield a different result.
2642	*/
2643	later_rq = NULL;
2644	break;
2645	}
2646
2647	/ Retry if something changed. /
2648	if (double_lock_balance(this_rq: rq, busiest: later_rq)) {
2649	if (unlikely(task_rq(task) != rq \|\|
2650	!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) \|\|
2651	task_on_cpu(rq, task) \|\|
2652	!dl_task(task) \|\|
2653	is_migration_disabled(task) \|\|
2654	!task_on_rq_queued(task))) {
2655	double_unlock_balance(this_rq: rq, busiest: later_rq);
2656	later_rq = NULL;
2657	break;
2658	}
2659	}
2660
2661	/*
2662	* If the rq we found has no -deadline task, or
2663	* its earliest one has a later deadline than our
2664	* task, the rq is a good one.
2665	*/
2666	if (dl_task_is_earliest_deadline(p: task, rq: later_rq))
2667	break;
2668
2669	/ Otherwise we try again. /
2670	double_unlock_balance(this_rq: rq, busiest: later_rq);
2671	later_rq = NULL;
2672	}
2673
2674	return later_rq;
2675	}
2676
2677	static struct task_struct pick_next_pushable_dl_task(struct* rq *rq)
2678	{
2679	struct task_struct *p;
2680
2681	if (!has_pushable_dl_tasks(rq))
2682	return NULL;
2683
2684	p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
2685
2686	WARN_ON_ONCE(rq->cpu != task_cpu(p));
2687	WARN_ON_ONCE(task_current(rq, p));
2688	WARN_ON_ONCE(p->nr_cpus_allowed <= `1`);
2689
2690	WARN_ON_ONCE(!task_on_rq_queued(p));
2691	WARN_ON_ONCE(!dl_task(p));
2692
2693	return p;
2694	}
2695
2696	/*
2697	* See if the non running -deadline tasks on this rq
2698	* can be sent to some other CPU where they can preempt
2699	* and start executing.
2700	*/
2701	static int push_dl_task(struct rq *rq)
2702	{
2703	struct task_struct *next_task;
2704	struct rq *later_rq;
2705	int ret = `0`;
2706
2707	next_task = pick_next_pushable_dl_task(rq);
2708	if (!next_task)
2709	return `0`;
2710
2711	retry:
2712	/*
2713	* If next_task preempts rq->curr, and rq->curr
2714	* can move away, it makes sense to just reschedule
2715	* without going further in pushing next_task.
2716	*/
2717	if (dl_task(p: rq->donor) &&
2718	dl_time_before(a: next_task->dl.deadline, b: rq->donor->dl.deadline) &&
2719	rq->curr->nr_cpus_allowed > `1`) {
2720	resched_curr(rq);
2721	return `0`;
2722	}
2723
2724	if (is_migration_disabled(p: next_task))
2725	return `0`;
2726
2727	if (WARN_ON(next_task == rq->curr))
2728	return `0`;
2729
2730	/ We might release rq lock /
2731	get_task_struct(t: next_task);
2732
2733	/ Will lock the rq it'll find /
2734	later_rq = find_lock_later_rq(task: next_task, rq);
2735	if (!later_rq) {
2736	struct task_struct *task;
2737
2738	/*
2739	* We must check all this again, since
2740	* find_lock_later_rq releases rq->lock and it is
2741	* then possible that next_task has migrated.
2742	*/
2743	task = pick_next_pushable_dl_task(rq);
2744	if (task == next_task) {
2745	/*
2746	* The task is still there. We don't try
2747	* again, some other CPU will pull it when ready.
2748	*/
2749	goto out;
2750	}
2751
2752	if (!task)
2753	/ No more tasks /
2754	goto out;
2755
2756	put_task_struct(t: next_task);
2757	next_task = task;
2758	goto retry;
2759	}
2760
2761	move_queued_task_locked(src_rq: rq, dst_rq: later_rq, task: next_task);
2762	ret = `1`;
2763
2764	resched_curr(rq: later_rq);
2765
2766	double_unlock_balance(this_rq: rq, busiest: later_rq);
2767
2768	out:
2769	put_task_struct(t: next_task);
2770
2771	return ret;
2772	}
2773
2774	static void push_dl_tasks(struct rq *rq)
2775	{
2776	/ push_dl_task() will return true if it moved a -deadline task /
2777	while (push_dl_task(rq))
2778	;
2779	}
2780
2781	static void pull_dl_task(struct rq *this_rq)
2782	{
2783	int this_cpu = this_rq->cpu, cpu;
2784	struct task_struct p, push_task;
2785	bool resched = false;
2786	struct rq *src_rq;
2787	u64 dmin = LONG_MAX;
2788
2789	if (likely(!dl_overloaded(this_rq)))
2790	return;
2791
2792	/*
2793	* Match the barrier from dl_set_overloaded; this guarantees that if we
2794	* see overloaded we must also see the dlo_mask bit.
2795	*/
2796	smp_rmb();
2797
2798	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
2799	if (this_cpu == cpu)
2800	continue;
2801
2802	src_rq = cpu_rq(cpu);
2803
2804	/*
2805	* It looks racy, and it is! However, as in sched_rt.c,
2806	* we are fine with this.
2807	*/
2808	if (this_rq->dl.dl_nr_running &&
2809	dl_time_before(a: this_rq->dl.earliest_dl.curr,
2810	b: src_rq->dl.earliest_dl.next))
2811	continue;
2812
2813	/ Might drop this_rq->lock /
2814	push_task = NULL;
2815	double_lock_balance(this_rq, busiest: src_rq);
2816
2817	/*
2818	* If there are no more pullable tasks on the
2819	* rq, we're done with it.
2820	*/
2821	if (src_rq->dl.dl_nr_running <= `1`)
2822	goto skip;
2823
2824	p = pick_earliest_pushable_dl_task(rq: src_rq, cpu: this_cpu);
2825
2826	/*
2827	* We found a task to be pulled if:
2828	* - it preempts our current (if there's one),
2829	* - it will preempt the last one we pulled (if any).
2830	*/
2831	if (p && dl_time_before(a: p->dl.deadline, b: dmin) &&
2832	dl_task_is_earliest_deadline(p, rq: this_rq)) {
2833	WARN_ON(p == src_rq->curr);
2834	WARN_ON(!task_on_rq_queued(p));
2835
2836	/*
2837	* Then we pull iff p has actually an earlier
2838	* deadline than the current task of its runqueue.
2839	*/
2840	if (dl_time_before(a: p->dl.deadline,
2841	b: src_rq->donor->dl.deadline))
2842	goto skip;
2843
2844	if (is_migration_disabled(p)) {
2845	push_task = get_push_task(rq: src_rq);
2846	} else {
2847	move_queued_task_locked(src_rq, dst_rq: this_rq, task: p);
2848	dmin = p->dl.deadline;
2849	resched = true;
2850	}
2851
2852	/ Is there any other task even earlier? /
2853	}
2854	skip:
2855	double_unlock_balance(this_rq, busiest: src_rq);
2856
2857	if (push_task) {
2858	preempt_disable();
2859	raw_spin_rq_unlock(rq: this_rq);
2860	stop_one_cpu_nowait(cpu: src_rq->cpu, fn: push_cpu_stop,
2861	arg: push_task, work_buf: &src_rq->push_work);
2862	preempt_enable();
2863	raw_spin_rq_lock(rq: this_rq);
2864	}
2865	}
2866
2867	if (resched)
2868	resched_curr(rq: this_rq);
2869	}
2870
2871	/*
2872	* Since the task is not running and a reschedule is not going to happen
2873	* anytime soon on its runqueue, we try pushing it away now.
2874	*/
2875	static void task_woken_dl(struct rq rq, struct* task_struct *p)
2876	{
2877	if (!task_on_cpu(rq, p) &&
2878	!test_tsk_need_resched(tsk: rq->curr) &&
2879	p->nr_cpus_allowed > `1` &&
2880	dl_task(p: rq->donor) &&
2881	(rq->curr->nr_cpus_allowed < `2` \|\|
2882	!dl_entity_preempt(a: &p->dl, b: &rq->donor->dl))) {
2883	push_dl_tasks(rq);
2884	}
2885	}
2886
2887	static void set_cpus_allowed_dl(struct task_struct *p,
2888	struct affinity_context *ctx)
2889	{
2890	struct root_domain *src_rd;
2891	struct rq *rq;
2892
2893	WARN_ON_ONCE(!dl_task(p));
2894
2895	rq = task_rq(p);
2896	src_rd = rq->rd;
2897	/*
2898	* Migrating a SCHED_DEADLINE task between exclusive
2899	* cpusets (different root_domains) entails a bandwidth
2900	* update. We already made space for us in the destination
2901	* domain (see cpuset_can_attach()).
2902	*/
2903	if (!cpumask_intersects(src1p: src_rd->span, src2p: ctx->new_mask)) {
2904	struct dl_bw *src_dl_b;
2905
2906	src_dl_b = dl_bw_of(i: cpu_of(rq));
2907	/*
2908	* We now free resources of the root_domain we are migrating
2909	* off. In the worst case, sched_setattr() may temporary fail
2910	* until we complete the update.
2911	*/
2912	raw_spin_lock(&src_dl_b->lock);
2913	__dl_sub(dl_b: src_dl_b, tsk_bw: p->dl.dl_bw, cpus: dl_bw_cpus(i: task_cpu(p)));
2914	raw_spin_unlock(&src_dl_b->lock);
2915	}
2916
2917	set_cpus_allowed_common(p, ctx);
2918	}
2919
2920	/ Assumes rq->lock is held /
2921	static void rq_online_dl(struct rq *rq)
2922	{
2923	if (rq->dl.overloaded)
2924	dl_set_overload(rq);
2925
2926	cpudl_set_freecpu(cp: &rq->rd->cpudl, cpu: rq->cpu);
2927	if (rq->dl.dl_nr_running > `0`)
2928	cpudl_set(cp: &rq->rd->cpudl, cpu: rq->cpu, dl: rq->dl.earliest_dl.curr);
2929	}
2930
2931	/ Assumes rq->lock is held /
2932	static void rq_offline_dl(struct rq *rq)
2933	{
2934	if (rq->dl.overloaded)
2935	dl_clear_overload(rq);
2936
2937	cpudl_clear(cp: &rq->rd->cpudl, cpu: rq->cpu);
2938	cpudl_clear_freecpu(cp: &rq->rd->cpudl, cpu: rq->cpu);
2939	}
2940
2941	void __init init_sched_dl_class(void)
2942	{
2943	unsigned int i;
2944
2945	for_each_possible_cpu(i)
2946	zalloc_cpumask_var_node(mask: &per_cpu(local_cpu_mask_dl, i),
2947	GFP_KERNEL, cpu_to_node(cpu: i));
2948	}
2949
2950	void dl_add_task_root_domain(struct task_struct *p)
2951	{
2952	struct rq_flags rf;
2953	struct rq *rq;
2954	struct dl_bw *dl_b;
2955
2956	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
2957	if (!dl_task(p) \|\| dl_entity_is_special(dl_se: &p->dl)) {
2958	raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
2959	return;
2960	}
2961
2962	rq = __task_rq_lock(p, rf: &rf);
2963
2964	dl_b = &rq->rd->dl_bw;
2965	raw_spin_lock(&dl_b->lock);
2966
2967	__dl_add(dl_b, tsk_bw: p->dl.dl_bw, cpus: cpumask_weight(srcp: rq->rd->span));
2968
2969	raw_spin_unlock(&dl_b->lock);
2970
2971	task_rq_unlock(rq, p, rf: &rf);
2972	}
2973
2974	void dl_clear_root_domain(struct root_domain *rd)
2975	{
2976	int i;
2977
2978	guard(raw_spinlock_irqsave)(l: &rd->dl_bw.lock);
2979	rd->dl_bw.total_bw = `0`;
2980
2981	/*
2982	* dl_servers are not tasks. Since dl_add_task_root_domain ignores
2983	* them, we need to account for them here explicitly.
2984	*/
2985	for_each_cpu(i, rd->span) {
2986	struct sched_dl_entity *dl_se = &cpu_rq(i)->fair_server;
2987
2988	if (dl_server(dl_se) && cpu_active(cpu: i))
2989	__dl_add(dl_b: &rd->dl_bw, tsk_bw: dl_se->dl_bw, cpus: dl_bw_cpus(i));
2990	}
2991	}
2992
2993	void dl_clear_root_domain_cpu(int cpu)
2994	{
2995	dl_clear_root_domain(cpu_rq(cpu)->rd);
2996	}
2997
2998	#endif /* CONFIG_SMP */
2999
3000	static void switched_from_dl(struct rq rq, struct* task_struct *p)
3001	{
3002	/*
3003	* task_non_contending() can start the "inactive timer" (if the 0-lag
3004	* time is in the future). If the task switches back to dl before
3005	* the "inactive timer" fires, it can continue to consume its current
3006	* runtime using its current deadline. If it stays outside of
3007	* SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
3008	* will reset the task parameters.
3009	*/
3010	if (task_on_rq_queued(p) && p->dl.dl_runtime)
3011	task_non_contending(dl_se: &p->dl);
3012
3013	/*
3014	* In case a task is setscheduled out from SCHED_DEADLINE we need to
3015	* keep track of that on its cpuset (for correct bandwidth tracking).
3016	*/
3017	dec_dl_tasks_cs(task: p);
3018
3019	if (!task_on_rq_queued(p)) {
3020	/*
3021	* Inactive timer is armed. However, p is leaving DEADLINE and
3022	* might migrate away from this rq while continuing to run on
3023	* some other class. We need to remove its contribution from
3024	* this rq running_bw now, or sub_rq_bw (below) will complain.
3025	*/
3026	if (p->dl.dl_non_contending)
3027	sub_running_bw(dl_se: &p->dl, dl_rq: &rq->dl);
3028	sub_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
3029	}
3030
3031	/*
3032	* We cannot use inactive_task_timer() to invoke sub_running_bw()
3033	* at the 0-lag time, because the task could have been migrated
3034	* while SCHED_OTHER in the meanwhile.
3035	*/
3036	if (p->dl.dl_non_contending)
3037	p->dl.dl_non_contending = `0`;
3038
3039	/*
3040	* Since this might be the only -deadline task on the rq,
3041	* this is the right place to try to pull some other one
3042	* from an overloaded CPU, if any.
3043	*/
3044	if (!task_on_rq_queued(p) \|\| rq->dl.dl_nr_running)
3045	return;
3046
3047	deadline_queue_pull_task(rq);
3048	}
3049
3050	/*
3051	* When switching to -deadline, we may overload the rq, then
3052	* we try to push someone off, if possible.
3053	*/
3054	static void switched_to_dl(struct rq rq, struct* task_struct *p)
3055	{
3056	cancel_inactive_timer(dl_se: &p->dl);
3057
3058	/*
3059	* In case a task is setscheduled to SCHED_DEADLINE we need to keep
3060	* track of that on its cpuset (for correct bandwidth tracking).
3061	*/
3062	inc_dl_tasks_cs(task: p);
3063
3064	/ If p is not queued we will update its parameters at next wakeup. /
3065	if (!task_on_rq_queued(p)) {
3066	add_rq_bw(dl_se: &p->dl, dl_rq: &rq->dl);
3067
3068	return;
3069	}
3070
3071	if (rq->donor != p) {
3072	#ifdef CONFIG_SMP
3073	if (p->nr_cpus_allowed > `1` && rq->dl.overloaded)
3074	deadline_queue_push_tasks(rq);
3075	#endif
3076	if (dl_task(p: rq->donor))
3077	wakeup_preempt_dl(rq, p, flags: `0`);
3078	else
3079	resched_curr(rq);
3080	} else {
3081	update_dl_rq_load_avg(now: rq_clock_pelt(rq), rq, running: `0`);
3082	}
3083	}
3084
3085	/*
3086	* If the scheduling parameters of a -deadline task changed,
3087	* a push or pull operation might be needed.
3088	*/
3089	static void prio_changed_dl(struct rq rq, struct* task_struct *p,
3090	int oldprio)
3091	{
3092	if (!task_on_rq_queued(p))
3093	return;
3094
3095	#ifdef CONFIG_SMP
3096	/*
3097	* This might be too much, but unfortunately
3098	* we don't have the old deadline value, and
3099	* we can't argue if the task is increasing
3100	* or lowering its prio, so...
3101	*/
3102	if (!rq->dl.overloaded)
3103	deadline_queue_pull_task(rq);
3104
3105	if (task_current_donor(rq, p)) {
3106	/*
3107	* If we now have a earlier deadline task than p,
3108	* then reschedule, provided p is still on this
3109	* runqueue.
3110	*/
3111	if (dl_time_before(a: rq->dl.earliest_dl.curr, b: p->dl.deadline))
3112	resched_curr(rq);
3113	} else {
3114	/*
3115	* Current may not be deadline in case p was throttled but we
3116	* have just replenished it (e.g. rt_mutex_setprio()).
3117	*
3118	* Otherwise, if p was given an earlier deadline, reschedule.
3119	*/
3120	if (!dl_task(p: rq->curr) \|\|
3121	dl_time_before(a: p->dl.deadline, b: rq->curr->dl.deadline))
3122	resched_curr(rq);
3123	}
3124	#else
3125	/*
3126	* We don't know if p has a earlier or later deadline, so let's blindly
3127	* set a (maybe not needed) rescheduling point.
3128	*/
3129	resched_curr(rq);
3130	#endif
3131	}
3132
3133	#ifdef CONFIG_SCHED_CORE
3134	static int task_is_throttled_dl(struct task_struct p, int* cpu)
3135	{
3136	return p->dl.dl_throttled;
3137	}
3138	#endif
3139
3140	DEFINE_SCHED_CLASS(dl) = {
3141
3142	.enqueue_task = enqueue_task_dl,
3143	.dequeue_task = dequeue_task_dl,
3144	.yield_task = yield_task_dl,
3145
3146	.wakeup_preempt = wakeup_preempt_dl,
3147
3148	.pick_task = pick_task_dl,
3149	.put_prev_task = put_prev_task_dl,
3150	.set_next_task = set_next_task_dl,
3151
3152	#ifdef CONFIG_SMP
3153	.balance = balance_dl,
3154	.select_task_rq = select_task_rq_dl,
3155	.migrate_task_rq = migrate_task_rq_dl,
3156	.set_cpus_allowed = set_cpus_allowed_dl,
3157	.rq_online = rq_online_dl,
3158	.rq_offline = rq_offline_dl,
3159	.task_woken = task_woken_dl,
3160	.find_lock_rq = find_lock_later_rq,
3161	#endif
3162
3163	.task_tick = task_tick_dl,
3164	.task_fork = task_fork_dl,
3165
3166	.prio_changed = prio_changed_dl,
3167	.switched_from = switched_from_dl,
3168	.switched_to = switched_to_dl,
3169
3170	.update_curr = update_curr_dl,
3171	#ifdef CONFIG_SCHED_CORE
3172	.task_is_throttled = task_is_throttled_dl,
3173	#endif
3174	};
3175
3176	/*
3177	* Used for dl_bw check and update, used under sched_rt_handler()::mutex and
3178	* sched_domains_mutex.
3179	*/
3180	u64 dl_cookie;
3181
3182	int sched_dl_global_validate(void)
3183	{
3184	u64 runtime = global_rt_runtime();
3185	u64 period = global_rt_period();
3186	u64 new_bw = to_ratio(period, runtime);
3187	u64 cookie = ++dl_cookie;
3188	struct dl_bw *dl_b;
3189	int cpu, cpus, ret = `0`;
3190	unsigned long flags;
3191
3192	/*
3193	* Here we want to check the bandwidth not being set to some
3194	* value smaller than the currently allocated bandwidth in
3195	* any of the root_domains.
3196	*/
3197	for_each_online_cpu(cpu) {
3198	rcu_read_lock_sched();
3199
3200	if (dl_bw_visited(cpu, cookie))
3201	goto next;
3202
3203	dl_b = dl_bw_of(i: cpu);
3204	cpus = dl_bw_cpus(i: cpu);
3205
3206	raw_spin_lock_irqsave(&dl_b->lock, flags);
3207	if (new_bw * cpus < dl_b->total_bw)
3208	ret = -EBUSY;
3209	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
3210
3211	next:
3212	rcu_read_unlock_sched();
3213
3214	if (ret)
3215	break;
3216	}
3217
3218	return ret;
3219	}
3220
3221	static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
3222	{
3223	if (global_rt_runtime() == RUNTIME_INF) {
3224	dl_rq->bw_ratio = `1` << RATIO_SHIFT;
3225	dl_rq->max_bw = dl_rq->extra_bw = `1` << BW_SHIFT;
3226	} else {
3227	dl_rq->bw_ratio = to_ratio(period: global_rt_runtime(),
3228	runtime: global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
3229	dl_rq->max_bw = dl_rq->extra_bw =
3230	to_ratio(period: global_rt_period(), runtime: global_rt_runtime());
3231	}
3232	}
3233
3234	void sched_dl_do_global(void)
3235	{
3236	u64 new_bw = -`1`;
3237	u64 cookie = ++dl_cookie;
3238	struct dl_bw *dl_b;
3239	int cpu;
3240	unsigned long flags;
3241
3242	if (global_rt_runtime() != RUNTIME_INF)
3243	new_bw = to_ratio(period: global_rt_period(), runtime: global_rt_runtime());
3244
3245	for_each_possible_cpu(cpu) {
3246	rcu_read_lock_sched();
3247
3248	if (dl_bw_visited(cpu, cookie)) {
3249	rcu_read_unlock_sched();
3250	continue;
3251	}
3252
3253	dl_b = dl_bw_of(i: cpu);
3254
3255	raw_spin_lock_irqsave(&dl_b->lock, flags);
3256	dl_b->bw = new_bw;
3257	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
3258
3259	rcu_read_unlock_sched();
3260	init_dl_rq_bw_ratio(dl_rq: &cpu_rq(cpu)->dl);
3261	}
3262	}
3263
3264	/*
3265	* We must be sure that accepting a new task (or allowing changing the
3266	* parameters of an existing one) is consistent with the bandwidth
3267	* constraints. If yes, this function also accordingly updates the currently
3268	* allocated bandwidth to reflect the new situation.
3269	*
3270	* This function is called while holding p's rq->lock.
3271	*/
3272	int sched_dl_overflow(struct task_struct p, int* policy,
3273	const struct sched_attr *attr)
3274	{
3275	u64 period = attr->sched_period ?: attr->sched_deadline;
3276	u64 runtime = attr->sched_runtime;
3277	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : `0`;
3278	int cpus, err = -`1`, cpu = task_cpu(p);
3279	struct dl_bw *dl_b = dl_bw_of(i: cpu);
3280	unsigned long cap;
3281
3282	if (attr->sched_flags & SCHED_FLAG_SUGOV)
3283	return `0`;
3284
3285	/ !deadline task may carry old deadline bandwidth /
3286	if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
3287	return `0`;
3288
3289	/*
3290	* Either if a task, enters, leave, or stays -deadline but changes
3291	* its parameters, we may need to update accordingly the total
3292	* allocated bandwidth of the container.
3293	*/
3294	raw_spin_lock(&dl_b->lock);
3295	cpus = dl_bw_cpus(i: cpu);
3296	cap = dl_bw_capacity(i: cpu);
3297
3298	if (dl_policy(policy) && !task_has_dl_policy(p) &&
3299	!__dl_overflow(dl_b, cap, old_bw: `0`, new_bw)) {
3300	if (hrtimer_active(timer: &p->dl.inactive_timer))
3301	__dl_sub(dl_b, tsk_bw: p->dl.dl_bw, cpus);
3302	__dl_add(dl_b, tsk_bw: new_bw, cpus);
3303	err = `0`;
3304	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
3305	!__dl_overflow(dl_b, cap, old_bw: p->dl.dl_bw, new_bw)) {
3306	/*
3307	* XXX this is slightly incorrect: when the task
3308	* utilization decreases, we should delay the total
3309	* utilization change until the task's 0-lag point.
3310	* But this would require to set the task's "inactive
3311	* timer" when the task is not inactive.
3312	*/
3313	__dl_sub(dl_b, tsk_bw: p->dl.dl_bw, cpus);
3314	__dl_add(dl_b, tsk_bw: new_bw, cpus);
3315	dl_change_utilization(p, new_bw);
3316	err = `0`;
3317	} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
3318	/*
3319	* Do not decrease the total deadline utilization here,
3320	* switched_from_dl() will take care to do it at the correct
3321	* (0-lag) time.
3322	*/
3323	err = `0`;
3324	}
3325	raw_spin_unlock(&dl_b->lock);
3326
3327	return err;
3328	}
3329
3330	/*
3331	* This function initializes the sched_dl_entity of a newly becoming
3332	* SCHED_DEADLINE task.
3333	*
3334	* Only the static values are considered here, the actual runtime and the
3335	* absolute deadline will be properly calculated when the task is enqueued
3336	* for the first time with its new policy.
3337	*/
3338	void __setparam_dl(struct task_struct p, const* struct sched_attr *attr)
3339	{
3340	struct sched_dl_entity *dl_se = &p->dl;
3341
3342	dl_se->dl_runtime = attr->sched_runtime;
3343	dl_se->dl_deadline = attr->sched_deadline;
3344	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
3345	dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS;
3346	dl_se->dl_bw = to_ratio(period: dl_se->dl_period, runtime: dl_se->dl_runtime);
3347	dl_se->dl_density = to_ratio(period: dl_se->dl_deadline, runtime: dl_se->dl_runtime);
3348	}
3349
3350	void __getparam_dl(struct task_struct p, struct* sched_attr *attr)
3351	{
3352	struct sched_dl_entity *dl_se = &p->dl;
3353
3354	attr->sched_priority = p->rt_priority;
3355	attr->sched_runtime = dl_se->dl_runtime;
3356	attr->sched_deadline = dl_se->dl_deadline;
3357	attr->sched_period = dl_se->dl_period;
3358	attr->sched_flags &= ~SCHED_DL_FLAGS;
3359	attr->sched_flags \|= dl_se->flags;
3360	}
3361
3362	/*
3363	* This function validates the new parameters of a -deadline task.
3364	* We ask for the deadline not being zero, and greater or equal
3365	* than the runtime, as well as the period of being zero or
3366	* greater than deadline. Furthermore, we have to be sure that
3367	* user parameters are above the internal resolution of 1us (we
3368	* check sched_runtime only since it is always the smaller one) and
3369	* below 2^63 ns (we have to check both sched_deadline and
3370	* sched_period, as the latter can be zero).
3371	*/
3372	bool __checkparam_dl(const struct sched_attr *attr)
3373	{
3374	u64 period, max, min;
3375
3376	/ special dl tasks don't actually use any parameter /
3377	if (attr->sched_flags & SCHED_FLAG_SUGOV)
3378	return true;
3379
3380	/ deadline != 0 /
3381	if (attr->sched_deadline == `0`)
3382	return false;
3383
3384	/*
3385	* Since we truncate DL_SCALE bits, make sure we're at least
3386	* that big.
3387	*/
3388	if (attr->sched_runtime < (`1ULL` << DL_SCALE))
3389	return false;
3390
3391	/*
3392	* Since we use the MSB for wrap-around and sign issues, make
3393	* sure it's not set (mind that period can be equal to zero).
3394	*/
3395	if (attr->sched_deadline & (`1ULL` << `63`) \|\|
3396	attr->sched_period & (`1ULL` << `63`))
3397	return false;
3398
3399	period = attr->sched_period;
3400	if (!period)
3401	period = attr->sched_deadline;
3402
3403	/ runtime <= deadline <= period (if period != 0) /
3404	if (period < attr->sched_deadline \|\|
3405	attr->sched_deadline < attr->sched_runtime)
3406	return false;
3407
3408	max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
3409	min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
3410
3411	if (period < min \|\| period > max)
3412	return false;
3413
3414	return true;
3415	}
3416
3417	/*
3418	* This function clears the sched_dl_entity static params.
3419	*/
3420	static void __dl_clear_params(struct sched_dl_entity *dl_se)
3421	{
3422	dl_se->dl_runtime = `0`;
3423	dl_se->dl_deadline = `0`;
3424	dl_se->dl_period = `0`;
3425	dl_se->flags = `0`;
3426	dl_se->dl_bw = `0`;
3427	dl_se->dl_density = `0`;
3428
3429	dl_se->dl_throttled = `0`;
3430	dl_se->dl_yielded = `0`;
3431	dl_se->dl_non_contending = `0`;
3432	dl_se->dl_overrun = `0`;
3433	dl_se->dl_server = `0`;
3434
3435	#ifdef CONFIG_RT_MUTEXES
3436	dl_se->pi_se = dl_se;
3437	#endif
3438	}
3439
3440	void init_dl_entity(struct sched_dl_entity *dl_se)
3441	{
3442	RB_CLEAR_NODE(&dl_se->rb_node);
3443	init_dl_task_timer(dl_se);
3444	init_dl_inactive_task_timer(dl_se);
3445	__dl_clear_params(dl_se);
3446	}
3447
3448	bool dl_param_changed(struct task_struct p, const* struct sched_attr *attr)
3449	{
3450	struct sched_dl_entity *dl_se = &p->dl;
3451
3452	if (dl_se->dl_runtime != attr->sched_runtime \|\|
3453	dl_se->dl_deadline != attr->sched_deadline \|\|
3454	dl_se->dl_period != attr->sched_period \|\|
3455	dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS))
3456	return true;
3457
3458	return false;
3459	}
3460
3461	#ifdef CONFIG_SMP
3462	int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
3463	const struct cpumask *trial)
3464	{
3465	unsigned long flags, cap;
3466	struct dl_bw *cur_dl_b;
3467	int ret = `1`;
3468
3469	rcu_read_lock_sched();
3470	cur_dl_b = dl_bw_of(cpumask_any(cur));
3471	cap = __dl_bw_capacity(mask: trial);
3472	raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
3473	if (__dl_overflow(dl_b: cur_dl_b, cap, old_bw: `0`, new_bw: `0`))
3474	ret = `0`;
3475	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
3476	rcu_read_unlock_sched();
3477
3478	return ret;
3479	}
3480
3481	enum dl_bw_request {
3482	dl_bw_req_deactivate = `0`,
3483	dl_bw_req_alloc,
3484	dl_bw_req_free
3485	};
3486
3487	static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
3488	{
3489	unsigned long flags, cap;
3490	struct dl_bw *dl_b;
3491	bool overflow = `0`;
3492	u64 fair_server_bw = `0`;
3493
3494	rcu_read_lock_sched();
3495	dl_b = dl_bw_of(i: cpu);
3496	raw_spin_lock_irqsave(&dl_b->lock, flags);
3497
3498	cap = dl_bw_capacity(i: cpu);
3499	switch (req) {
3500	case dl_bw_req_free:
3501	__dl_sub(dl_b, tsk_bw: dl_bw, cpus: dl_bw_cpus(i: cpu));
3502	break;
3503	case dl_bw_req_alloc:
3504	overflow = __dl_overflow(dl_b, cap, old_bw: `0`, new_bw: dl_bw);
3505
3506	if (!overflow) {
3507	/*
3508	* We reserve space in the destination
3509	* root_domain, as we can't fail after this point.
3510	* We will free resources in the source root_domain
3511	* later on (see set_cpus_allowed_dl()).
3512	*/
3513	__dl_add(dl_b, tsk_bw: dl_bw, cpus: dl_bw_cpus(i: cpu));
3514	}
3515	break;
3516	case dl_bw_req_deactivate:
3517	/*
3518	* cpu is not off yet, but we need to do the math by
3519	* considering it off already (i.e., what would happen if we
3520	* turn cpu off?).
3521	*/
3522	cap -= arch_scale_cpu_capacity(cpu);
3523
3524	/*
3525	* cpu is going offline and NORMAL tasks will be moved away
3526	* from it. We can thus discount dl_server bandwidth
3527	* contribution as it won't need to be servicing tasks after
3528	* the cpu is off.
3529	*/
3530	if (cpu_rq(cpu)->fair_server.dl_server)
3531	fair_server_bw = cpu_rq(cpu)->fair_server.dl_bw;
3532
3533	/*
3534	* Not much to check if no DEADLINE bandwidth is present.
3535	* dl_servers we can discount, as tasks will be moved out the
3536	* offlined CPUs anyway.
3537	*/
3538	if (dl_b->total_bw - fair_server_bw > `0`) {
3539	/*
3540	* Leaving at least one CPU for DEADLINE tasks seems a
3541	* wise thing to do. As said above, cpu is not offline
3542	* yet, so account for that.
3543	*/
3544	if (dl_bw_cpus(i: cpu) - `1`)
3545	overflow = __dl_overflow(dl_b, cap, old_bw: fair_server_bw, new_bw: `0`);
3546	else
3547	overflow = `1`;
3548	}
3549
3550	break;
3551	}
3552
3553	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
3554	rcu_read_unlock_sched();
3555
3556	return overflow ? -EBUSY : `0`;
3557	}
3558
3559	int dl_bw_deactivate(int cpu)
3560	{
3561	return dl_bw_manage(req: dl_bw_req_deactivate, cpu, dl_bw: `0`);
3562	}
3563
3564	int dl_bw_alloc(int cpu, u64 dl_bw)
3565	{
3566	return dl_bw_manage(req: dl_bw_req_alloc, cpu, dl_bw);
3567	}
3568
3569	void dl_bw_free(int cpu, u64 dl_bw)
3570	{
3571	dl_bw_manage(req: dl_bw_req_free, cpu, dl_bw);
3572	}
3573	#endif
3574
3575	void print_dl_stats(struct seq_file m, int* cpu)
3576	{
3577	print_dl_rq(m, cpu, dl_rq: &cpu_rq(cpu)->dl);
3578	}
3579

Provided by KDAB

Definitions

sysctl_sched_dl_period_max
sysctl_sched_dl_period_min
sched_dl_sysctls
sched_dl_sysctl_init
dl_server
dl_task_of
rq_of_dl_rq
rq_of_dl_se
dl_rq_of_se
on_dl_rq
pi_of
is_dl_boosted
dl_bw_of
dl_bw_cpus
__dl_bw_capacity
dl_bw_capacity
dl_bw_visited
__dl_update
__dl_sub
__dl_add
__dl_overflow
__add_running_bw
__sub_running_bw
__add_rq_bw
__sub_rq_bw
add_rq_bw
sub_rq_bw
add_running_bw
sub_running_bw
dl_rq_change_utilization
cancel_dl_timer
cancel_replenish_timer
cancel_inactive_timer
dl_change_utilization
task_non_contending
task_contending
is_leftmost
init_dl_bw
init_dl_rq
dl_overloaded
dl_set_overload
dl_clear_overload
__pushable_less
has_pushable_dl_tasks
enqueue_pushable_dl_task
dequeue_pushable_dl_task
need_pull_dl_task
dl_push_head
dl_pull_head
deadline_queue_push_tasks
deadline_queue_pull_task
dl_task_offline_migration
replenish_dl_new_period
setup_new_dl_entity
replenish_dl_entity
dl_entity_overflow
update_dl_revised_wakeup
dl_is_implicit
update_dl_entity
dl_next_period
start_dl_timer
__push_dl_task
dl_server_min_res
dl_server_timer
dl_task_timer
init_dl_task_timer
dl_check_constrained_dl
dl_runtime_exceeded
grub_reclaim
dl_scaled_delta_exec
update_curr_dl_se
dl_server_update_idle_time
dl_server_update
dl_server_start
dl_server_stop
dl_server_init
__dl_server_attach_root
dl_server_apply_params
update_curr_dl
inactive_task_timer
init_dl_inactive_task_timer
inc_dl_deadline
dec_dl_deadline
inc_dl_tasks
dec_dl_tasks
__dl_less
__schedstats_from_dl_se
update_stats_wait_start_dl
update_stats_wait_end_dl
update_stats_enqueue_sleeper_dl
update_stats_enqueue_dl
update_stats_dequeue_dl
__enqueue_dl_entity
__dequeue_dl_entity
enqueue_dl_entity
dequeue_dl_entity
enqueue_task_dl
dequeue_task_dl
yield_task_dl
dl_task_is_earliest_deadline
select_task_rq_dl
migrate_task_rq_dl
check_preempt_equal_dl
balance_dl
wakeup_preempt_dl
start_hrtick_dl
set_next_task_dl
pick_next_dl_entity
__pick_task_dl
pick_task_dl
put_prev_task_dl
task_tick_dl
task_fork_dl
pick_earliest_pushable_dl_task
local_cpu_mask_dl
find_later_rq
find_lock_later_rq
pick_next_pushable_dl_task
push_dl_task
push_dl_tasks
pull_dl_task
task_woken_dl
set_cpus_allowed_dl
rq_online_dl
rq_offline_dl
init_sched_dl_class
dl_add_task_root_domain
dl_clear_root_domain
dl_clear_root_domain_cpu
switched_from_dl
switched_to_dl
prio_changed_dl
task_is_throttled_dl
dl_cookie
sched_dl_global_validate
init_dl_rq_bw_ratio
sched_dl_do_global
sched_dl_overflow
__setparam_dl
__getparam_dl
__checkparam_dl
__dl_clear_params
init_dl_entity
dl_param_changed
dl_cpuset_cpumask_can_shrink
dl_bw_request
dl_bw_manage
dl_bw_deactivate
dl_bw_alloc
dl_bw_free

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/kernel/sched/deadline.c