1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Virtual cpu timer based timer functions. |
4 | * |
5 | * Copyright IBM Corp. 2004, 2012 |
6 | * Author(s): Jan Glauber <jan.glauber@de.ibm.com> |
7 | */ |
8 | |
9 | #include <linux/kernel_stat.h> |
10 | #include <linux/export.h> |
11 | #include <linux/kernel.h> |
12 | #include <linux/timex.h> |
13 | #include <linux/types.h> |
14 | #include <linux/time.h> |
15 | #include <asm/alternative.h> |
16 | #include <asm/cputime.h> |
17 | #include <asm/vtimer.h> |
18 | #include <asm/vtime.h> |
19 | #include <asm/cpu_mf.h> |
20 | #include <asm/smp.h> |
21 | |
22 | #include "entry.h" |
23 | |
24 | static void virt_timer_expire(void); |
25 | |
26 | static LIST_HEAD(virt_timer_list); |
27 | static DEFINE_SPINLOCK(virt_timer_lock); |
28 | static atomic64_t virt_timer_current; |
29 | static atomic64_t virt_timer_elapsed; |
30 | |
31 | DEFINE_PER_CPU(u64, mt_cycles[8]); |
32 | static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 }; |
33 | static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 }; |
34 | static DEFINE_PER_CPU(u64, mt_scaling_jiffies); |
35 | |
36 | static inline u64 get_vtimer(void) |
37 | { |
38 | u64 timer; |
39 | |
40 | asm volatile("stpt %0" : "=Q" (timer)); |
41 | return timer; |
42 | } |
43 | |
44 | static inline void set_vtimer(u64 expires) |
45 | { |
46 | u64 timer; |
47 | |
48 | asm volatile( |
49 | " stpt %0\n" /* Store current cpu timer value */ |
50 | " spt %1" /* Set new value imm. afterwards */ |
51 | : "=Q" (timer) : "Q" (expires)); |
52 | S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; |
53 | S390_lowcore.last_update_timer = expires; |
54 | } |
55 | |
56 | static inline int virt_timer_forward(u64 elapsed) |
57 | { |
58 | BUG_ON(!irqs_disabled()); |
59 | |
60 | if (list_empty(head: &virt_timer_list)) |
61 | return 0; |
62 | elapsed = atomic64_add_return(i: elapsed, v: &virt_timer_elapsed); |
63 | return elapsed >= atomic64_read(v: &virt_timer_current); |
64 | } |
65 | |
66 | static void update_mt_scaling(void) |
67 | { |
68 | u64 cycles_new[8], *cycles_old; |
69 | u64 delta, fac, mult, div; |
70 | int i; |
71 | |
72 | stcctm(MT_DIAG, smp_cpu_mtid + 1, cycles_new); |
73 | cycles_old = this_cpu_ptr(mt_cycles); |
74 | fac = 1; |
75 | mult = div = 0; |
76 | for (i = 0; i <= smp_cpu_mtid; i++) { |
77 | delta = cycles_new[i] - cycles_old[i]; |
78 | div += delta; |
79 | mult *= i + 1; |
80 | mult += delta * fac; |
81 | fac *= i + 1; |
82 | } |
83 | div *= fac; |
84 | if (div > 0) { |
85 | /* Update scaling factor */ |
86 | __this_cpu_write(mt_scaling_mult, mult); |
87 | __this_cpu_write(mt_scaling_div, div); |
88 | memcpy(cycles_old, cycles_new, |
89 | sizeof(u64) * (smp_cpu_mtid + 1)); |
90 | } |
91 | __this_cpu_write(mt_scaling_jiffies, jiffies_64); |
92 | } |
93 | |
94 | static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new) |
95 | { |
96 | u64 delta; |
97 | |
98 | delta = new - *tsk_vtime; |
99 | *tsk_vtime = new; |
100 | return delta; |
101 | } |
102 | |
103 | |
104 | static inline u64 scale_vtime(u64 vtime) |
105 | { |
106 | u64 mult = __this_cpu_read(mt_scaling_mult); |
107 | u64 div = __this_cpu_read(mt_scaling_div); |
108 | |
109 | if (smp_cpu_mtid) |
110 | return vtime * mult / div; |
111 | return vtime; |
112 | } |
113 | |
114 | static void account_system_index_scaled(struct task_struct *p, u64 cputime, |
115 | enum cpu_usage_stat index) |
116 | { |
117 | p->stimescaled += cputime_to_nsecs(scale_vtime(vtime: cputime)); |
118 | account_system_index_time(p, cputime_to_nsecs(cputime), index); |
119 | } |
120 | |
121 | /* |
122 | * Update process times based on virtual cpu times stored by entry.S |
123 | * to the lowcore fields user_timer, system_timer & steal_clock. |
124 | */ |
125 | static int do_account_vtime(struct task_struct *tsk) |
126 | { |
127 | u64 timer, clock, user, guest, system, hardirq, softirq; |
128 | |
129 | timer = S390_lowcore.last_update_timer; |
130 | clock = S390_lowcore.last_update_clock; |
131 | asm volatile( |
132 | " stpt %0\n" /* Store current cpu timer value */ |
133 | " stckf %1" /* Store current tod clock value */ |
134 | : "=Q" (S390_lowcore.last_update_timer), |
135 | "=Q" (S390_lowcore.last_update_clock) |
136 | : : "cc" ); |
137 | clock = S390_lowcore.last_update_clock - clock; |
138 | timer -= S390_lowcore.last_update_timer; |
139 | |
140 | if (hardirq_count()) |
141 | S390_lowcore.hardirq_timer += timer; |
142 | else |
143 | S390_lowcore.system_timer += timer; |
144 | |
145 | /* Update MT utilization calculation */ |
146 | if (smp_cpu_mtid && |
147 | time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies))) |
148 | update_mt_scaling(); |
149 | |
150 | /* Calculate cputime delta */ |
151 | user = update_tsk_timer(tsk_vtime: &tsk->thread.user_timer, |
152 | READ_ONCE(S390_lowcore.user_timer)); |
153 | guest = update_tsk_timer(tsk_vtime: &tsk->thread.guest_timer, |
154 | READ_ONCE(S390_lowcore.guest_timer)); |
155 | system = update_tsk_timer(tsk_vtime: &tsk->thread.system_timer, |
156 | READ_ONCE(S390_lowcore.system_timer)); |
157 | hardirq = update_tsk_timer(tsk_vtime: &tsk->thread.hardirq_timer, |
158 | READ_ONCE(S390_lowcore.hardirq_timer)); |
159 | softirq = update_tsk_timer(tsk_vtime: &tsk->thread.softirq_timer, |
160 | READ_ONCE(S390_lowcore.softirq_timer)); |
161 | S390_lowcore.steal_timer += |
162 | clock - user - guest - system - hardirq - softirq; |
163 | |
164 | /* Push account value */ |
165 | if (user) { |
166 | account_user_time(tsk, cputime_to_nsecs(user)); |
167 | tsk->utimescaled += cputime_to_nsecs(scale_vtime(vtime: user)); |
168 | } |
169 | |
170 | if (guest) { |
171 | account_guest_time(tsk, cputime_to_nsecs(guest)); |
172 | tsk->utimescaled += cputime_to_nsecs(scale_vtime(vtime: guest)); |
173 | } |
174 | |
175 | if (system) |
176 | account_system_index_scaled(p: tsk, cputime: system, index: CPUTIME_SYSTEM); |
177 | if (hardirq) |
178 | account_system_index_scaled(p: tsk, cputime: hardirq, index: CPUTIME_IRQ); |
179 | if (softirq) |
180 | account_system_index_scaled(p: tsk, cputime: softirq, index: CPUTIME_SOFTIRQ); |
181 | |
182 | return virt_timer_forward(elapsed: user + guest + system + hardirq + softirq); |
183 | } |
184 | |
185 | void vtime_task_switch(struct task_struct *prev) |
186 | { |
187 | do_account_vtime(tsk: prev); |
188 | prev->thread.user_timer = S390_lowcore.user_timer; |
189 | prev->thread.guest_timer = S390_lowcore.guest_timer; |
190 | prev->thread.system_timer = S390_lowcore.system_timer; |
191 | prev->thread.hardirq_timer = S390_lowcore.hardirq_timer; |
192 | prev->thread.softirq_timer = S390_lowcore.softirq_timer; |
193 | S390_lowcore.user_timer = current->thread.user_timer; |
194 | S390_lowcore.guest_timer = current->thread.guest_timer; |
195 | S390_lowcore.system_timer = current->thread.system_timer; |
196 | S390_lowcore.hardirq_timer = current->thread.hardirq_timer; |
197 | S390_lowcore.softirq_timer = current->thread.softirq_timer; |
198 | } |
199 | |
200 | /* |
201 | * In s390, accounting pending user time also implies |
202 | * accounting system time in order to correctly compute |
203 | * the stolen time accounting. |
204 | */ |
205 | void vtime_flush(struct task_struct *tsk) |
206 | { |
207 | u64 steal, avg_steal; |
208 | |
209 | if (do_account_vtime(tsk)) |
210 | virt_timer_expire(); |
211 | |
212 | steal = S390_lowcore.steal_timer; |
213 | avg_steal = S390_lowcore.avg_steal_timer; |
214 | if ((s64) steal > 0) { |
215 | S390_lowcore.steal_timer = 0; |
216 | account_steal_time(cputime_to_nsecs(steal)); |
217 | avg_steal += steal; |
218 | } |
219 | S390_lowcore.avg_steal_timer = avg_steal / 2; |
220 | } |
221 | |
222 | static u64 vtime_delta(void) |
223 | { |
224 | u64 timer = S390_lowcore.last_update_timer; |
225 | |
226 | S390_lowcore.last_update_timer = get_vtimer(); |
227 | |
228 | return timer - S390_lowcore.last_update_timer; |
229 | } |
230 | |
231 | /* |
232 | * Update process times based on virtual cpu times stored by entry.S |
233 | * to the lowcore fields user_timer, system_timer & steal_clock. |
234 | */ |
235 | void vtime_account_kernel(struct task_struct *tsk) |
236 | { |
237 | u64 delta = vtime_delta(); |
238 | |
239 | if (tsk->flags & PF_VCPU) |
240 | S390_lowcore.guest_timer += delta; |
241 | else |
242 | S390_lowcore.system_timer += delta; |
243 | |
244 | virt_timer_forward(elapsed: delta); |
245 | } |
246 | EXPORT_SYMBOL_GPL(vtime_account_kernel); |
247 | |
248 | void vtime_account_softirq(struct task_struct *tsk) |
249 | { |
250 | u64 delta = vtime_delta(); |
251 | |
252 | S390_lowcore.softirq_timer += delta; |
253 | |
254 | virt_timer_forward(elapsed: delta); |
255 | } |
256 | |
257 | void vtime_account_hardirq(struct task_struct *tsk) |
258 | { |
259 | u64 delta = vtime_delta(); |
260 | |
261 | S390_lowcore.hardirq_timer += delta; |
262 | |
263 | virt_timer_forward(elapsed: delta); |
264 | } |
265 | |
266 | /* |
267 | * Sorted add to a list. List is linear searched until first bigger |
268 | * element is found. |
269 | */ |
270 | static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) |
271 | { |
272 | struct vtimer_list *tmp; |
273 | |
274 | list_for_each_entry(tmp, head, entry) { |
275 | if (tmp->expires > timer->expires) { |
276 | list_add_tail(new: &timer->entry, head: &tmp->entry); |
277 | return; |
278 | } |
279 | } |
280 | list_add_tail(new: &timer->entry, head); |
281 | } |
282 | |
283 | /* |
284 | * Handler for expired virtual CPU timer. |
285 | */ |
286 | static void virt_timer_expire(void) |
287 | { |
288 | struct vtimer_list *timer, *tmp; |
289 | unsigned long elapsed; |
290 | LIST_HEAD(cb_list); |
291 | |
292 | /* walk timer list, fire all expired timers */ |
293 | spin_lock(lock: &virt_timer_lock); |
294 | elapsed = atomic64_read(v: &virt_timer_elapsed); |
295 | list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) { |
296 | if (timer->expires < elapsed) |
297 | /* move expired timer to the callback queue */ |
298 | list_move_tail(list: &timer->entry, head: &cb_list); |
299 | else |
300 | timer->expires -= elapsed; |
301 | } |
302 | if (!list_empty(head: &virt_timer_list)) { |
303 | timer = list_first_entry(&virt_timer_list, |
304 | struct vtimer_list, entry); |
305 | atomic64_set(v: &virt_timer_current, i: timer->expires); |
306 | } |
307 | atomic64_sub(i: elapsed, v: &virt_timer_elapsed); |
308 | spin_unlock(lock: &virt_timer_lock); |
309 | |
310 | /* Do callbacks and recharge periodic timers */ |
311 | list_for_each_entry_safe(timer, tmp, &cb_list, entry) { |
312 | list_del_init(entry: &timer->entry); |
313 | timer->function(timer->data); |
314 | if (timer->interval) { |
315 | /* Recharge interval timer */ |
316 | timer->expires = timer->interval + |
317 | atomic64_read(v: &virt_timer_elapsed); |
318 | spin_lock(lock: &virt_timer_lock); |
319 | list_add_sorted(timer, head: &virt_timer_list); |
320 | spin_unlock(lock: &virt_timer_lock); |
321 | } |
322 | } |
323 | } |
324 | |
325 | void init_virt_timer(struct vtimer_list *timer) |
326 | { |
327 | timer->function = NULL; |
328 | INIT_LIST_HEAD(list: &timer->entry); |
329 | } |
330 | EXPORT_SYMBOL(init_virt_timer); |
331 | |
332 | static inline int vtimer_pending(struct vtimer_list *timer) |
333 | { |
334 | return !list_empty(head: &timer->entry); |
335 | } |
336 | |
337 | static void internal_add_vtimer(struct vtimer_list *timer) |
338 | { |
339 | if (list_empty(head: &virt_timer_list)) { |
340 | /* First timer, just program it. */ |
341 | atomic64_set(v: &virt_timer_current, i: timer->expires); |
342 | atomic64_set(v: &virt_timer_elapsed, i: 0); |
343 | list_add(new: &timer->entry, head: &virt_timer_list); |
344 | } else { |
345 | /* Update timer against current base. */ |
346 | timer->expires += atomic64_read(v: &virt_timer_elapsed); |
347 | if (likely((s64) timer->expires < |
348 | (s64) atomic64_read(&virt_timer_current))) |
349 | /* The new timer expires before the current timer. */ |
350 | atomic64_set(v: &virt_timer_current, i: timer->expires); |
351 | /* Insert new timer into the list. */ |
352 | list_add_sorted(timer, head: &virt_timer_list); |
353 | } |
354 | } |
355 | |
356 | static void __add_vtimer(struct vtimer_list *timer, int periodic) |
357 | { |
358 | unsigned long flags; |
359 | |
360 | timer->interval = periodic ? timer->expires : 0; |
361 | spin_lock_irqsave(&virt_timer_lock, flags); |
362 | internal_add_vtimer(timer); |
363 | spin_unlock_irqrestore(lock: &virt_timer_lock, flags); |
364 | } |
365 | |
366 | /* |
367 | * add_virt_timer - add a oneshot virtual CPU timer |
368 | */ |
369 | void add_virt_timer(struct vtimer_list *timer) |
370 | { |
371 | __add_vtimer(timer, periodic: 0); |
372 | } |
373 | EXPORT_SYMBOL(add_virt_timer); |
374 | |
375 | /* |
376 | * add_virt_timer_int - add an interval virtual CPU timer |
377 | */ |
378 | void add_virt_timer_periodic(struct vtimer_list *timer) |
379 | { |
380 | __add_vtimer(timer, periodic: 1); |
381 | } |
382 | EXPORT_SYMBOL(add_virt_timer_periodic); |
383 | |
384 | static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic) |
385 | { |
386 | unsigned long flags; |
387 | int rc; |
388 | |
389 | BUG_ON(!timer->function); |
390 | |
391 | if (timer->expires == expires && vtimer_pending(timer)) |
392 | return 1; |
393 | spin_lock_irqsave(&virt_timer_lock, flags); |
394 | rc = vtimer_pending(timer); |
395 | if (rc) |
396 | list_del_init(entry: &timer->entry); |
397 | timer->interval = periodic ? expires : 0; |
398 | timer->expires = expires; |
399 | internal_add_vtimer(timer); |
400 | spin_unlock_irqrestore(lock: &virt_timer_lock, flags); |
401 | return rc; |
402 | } |
403 | |
404 | /* |
405 | * returns whether it has modified a pending timer (1) or not (0) |
406 | */ |
407 | int mod_virt_timer(struct vtimer_list *timer, u64 expires) |
408 | { |
409 | return __mod_vtimer(timer, expires, periodic: 0); |
410 | } |
411 | EXPORT_SYMBOL(mod_virt_timer); |
412 | |
413 | /* |
414 | * returns whether it has modified a pending timer (1) or not (0) |
415 | */ |
416 | int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires) |
417 | { |
418 | return __mod_vtimer(timer, expires, periodic: 1); |
419 | } |
420 | EXPORT_SYMBOL(mod_virt_timer_periodic); |
421 | |
422 | /* |
423 | * Delete a virtual timer. |
424 | * |
425 | * returns whether the deleted timer was pending (1) or not (0) |
426 | */ |
427 | int del_virt_timer(struct vtimer_list *timer) |
428 | { |
429 | unsigned long flags; |
430 | |
431 | if (!vtimer_pending(timer)) |
432 | return 0; |
433 | spin_lock_irqsave(&virt_timer_lock, flags); |
434 | list_del_init(entry: &timer->entry); |
435 | spin_unlock_irqrestore(lock: &virt_timer_lock, flags); |
436 | return 1; |
437 | } |
438 | EXPORT_SYMBOL(del_virt_timer); |
439 | |
440 | /* |
441 | * Start the virtual CPU timer on the current CPU. |
442 | */ |
443 | void vtime_init(void) |
444 | { |
445 | /* set initial cpu timer */ |
446 | set_vtimer(VTIMER_MAX_SLICE); |
447 | /* Setup initial MT scaling values */ |
448 | if (smp_cpu_mtid) { |
449 | __this_cpu_write(mt_scaling_jiffies, jiffies); |
450 | __this_cpu_write(mt_scaling_mult, 1); |
451 | __this_cpu_write(mt_scaling_div, 1); |
452 | stcctm(MT_DIAG, smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles)); |
453 | } |
454 | } |
455 | |