1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Generic userspace implementations of gettimeofday() and similar. |
4 | */ |
5 | #include <vdso/datapage.h> |
6 | #include <vdso/helpers.h> |
7 | |
8 | #ifndef vdso_calc_delta |
9 | /* |
10 | * Default implementation which works for all sane clocksources. That |
11 | * obviously excludes x86/TSC. |
12 | */ |
13 | static __always_inline |
14 | u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) |
15 | { |
16 | return ((cycles - last) & mask) * mult; |
17 | } |
18 | #endif |
19 | |
20 | #ifndef vdso_shift_ns |
21 | static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift) |
22 | { |
23 | return ns >> shift; |
24 | } |
25 | #endif |
26 | |
27 | #ifndef __arch_vdso_hres_capable |
28 | static inline bool __arch_vdso_hres_capable(void) |
29 | { |
30 | return true; |
31 | } |
32 | #endif |
33 | |
34 | #ifndef vdso_clocksource_ok |
35 | static inline bool vdso_clocksource_ok(const struct vdso_data *vd) |
36 | { |
37 | return vd->clock_mode != VDSO_CLOCKMODE_NONE; |
38 | } |
39 | #endif |
40 | |
41 | #ifndef vdso_cycles_ok |
42 | static inline bool vdso_cycles_ok(u64 cycles) |
43 | { |
44 | return true; |
45 | } |
46 | #endif |
47 | |
48 | #ifdef CONFIG_TIME_NS |
49 | static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, |
50 | struct __kernel_timespec *ts) |
51 | { |
52 | const struct vdso_data *vd; |
53 | const struct timens_offset *offs = &vdns->offset[clk]; |
54 | const struct vdso_timestamp *vdso_ts; |
55 | u64 cycles, last, ns; |
56 | u32 seq; |
57 | s64 sec; |
58 | |
59 | vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE); |
60 | vd = __arch_get_timens_vdso_data(vd); |
61 | if (clk != CLOCK_MONOTONIC_RAW) |
62 | vd = &vd[CS_HRES_COARSE]; |
63 | else |
64 | vd = &vd[CS_RAW]; |
65 | vdso_ts = &vd->basetime[clk]; |
66 | |
67 | do { |
68 | seq = vdso_read_begin(vd); |
69 | |
70 | if (unlikely(!vdso_clocksource_ok(vd))) |
71 | return -1; |
72 | |
73 | cycles = __arch_get_hw_counter(clock_mode: vd->clock_mode, vd); |
74 | if (unlikely(!vdso_cycles_ok(cycles))) |
75 | return -1; |
76 | ns = vdso_ts->nsec; |
77 | last = vd->cycle_last; |
78 | ns += vdso_calc_delta(cycles, last, mask: vd->mask, mult: vd->mult); |
79 | ns = vdso_shift_ns(ns, shift: vd->shift); |
80 | sec = vdso_ts->sec; |
81 | } while (unlikely(vdso_read_retry(vd, seq))); |
82 | |
83 | /* Add the namespace offset */ |
84 | sec += offs->sec; |
85 | ns += offs->nsec; |
86 | |
87 | /* |
88 | * Do this outside the loop: a race inside the loop could result |
89 | * in __iter_div_u64_rem() being extremely slow. |
90 | */ |
91 | ts->tv_sec = sec + __iter_div_u64_rem(dividend: ns, NSEC_PER_SEC, remainder: &ns); |
92 | ts->tv_nsec = ns; |
93 | |
94 | return 0; |
95 | } |
96 | #else |
97 | static __always_inline |
98 | const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) |
99 | { |
100 | return NULL; |
101 | } |
102 | |
103 | static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, |
104 | struct __kernel_timespec *ts) |
105 | { |
106 | return -EINVAL; |
107 | } |
108 | #endif |
109 | |
110 | static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, |
111 | struct __kernel_timespec *ts) |
112 | { |
113 | const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; |
114 | u64 cycles, last, sec, ns; |
115 | u32 seq; |
116 | |
117 | /* Allows to compile the high resolution parts out */ |
118 | if (!__arch_vdso_hres_capable()) |
119 | return -1; |
120 | |
121 | do { |
122 | /* |
123 | * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time namespace |
124 | * enabled tasks have a special VVAR page installed which |
125 | * has vd->seq set to 1 and vd->clock_mode set to |
126 | * VDSO_CLOCKMODE_TIMENS. For non time namespace affected tasks |
127 | * this does not affect performance because if vd->seq is |
128 | * odd, i.e. a concurrent update is in progress the extra |
129 | * check for vd->clock_mode is just a few extra |
130 | * instructions while spin waiting for vd->seq to become |
131 | * even again. |
132 | */ |
133 | while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) { |
134 | if (IS_ENABLED(CONFIG_TIME_NS) && |
135 | vd->clock_mode == VDSO_CLOCKMODE_TIMENS) |
136 | return do_hres_timens(vdns: vd, clk, ts); |
137 | cpu_relax(); |
138 | } |
139 | smp_rmb(); |
140 | |
141 | if (unlikely(!vdso_clocksource_ok(vd))) |
142 | return -1; |
143 | |
144 | cycles = __arch_get_hw_counter(clock_mode: vd->clock_mode, vd); |
145 | if (unlikely(!vdso_cycles_ok(cycles))) |
146 | return -1; |
147 | ns = vdso_ts->nsec; |
148 | last = vd->cycle_last; |
149 | ns += vdso_calc_delta(cycles, last, mask: vd->mask, mult: vd->mult); |
150 | ns = vdso_shift_ns(ns, shift: vd->shift); |
151 | sec = vdso_ts->sec; |
152 | } while (unlikely(vdso_read_retry(vd, seq))); |
153 | |
154 | /* |
155 | * Do this outside the loop: a race inside the loop could result |
156 | * in __iter_div_u64_rem() being extremely slow. |
157 | */ |
158 | ts->tv_sec = sec + __iter_div_u64_rem(dividend: ns, NSEC_PER_SEC, remainder: &ns); |
159 | ts->tv_nsec = ns; |
160 | |
161 | return 0; |
162 | } |
163 | |
164 | #ifdef CONFIG_TIME_NS |
165 | static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, |
166 | struct __kernel_timespec *ts) |
167 | { |
168 | const struct vdso_data *vd = __arch_get_timens_vdso_data(vd: vdns); |
169 | const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; |
170 | const struct timens_offset *offs = &vdns->offset[clk]; |
171 | u64 nsec; |
172 | s64 sec; |
173 | s32 seq; |
174 | |
175 | do { |
176 | seq = vdso_read_begin(vd); |
177 | sec = vdso_ts->sec; |
178 | nsec = vdso_ts->nsec; |
179 | } while (unlikely(vdso_read_retry(vd, seq))); |
180 | |
181 | /* Add the namespace offset */ |
182 | sec += offs->sec; |
183 | nsec += offs->nsec; |
184 | |
185 | /* |
186 | * Do this outside the loop: a race inside the loop could result |
187 | * in __iter_div_u64_rem() being extremely slow. |
188 | */ |
189 | ts->tv_sec = sec + __iter_div_u64_rem(dividend: nsec, NSEC_PER_SEC, remainder: &nsec); |
190 | ts->tv_nsec = nsec; |
191 | return 0; |
192 | } |
193 | #else |
194 | static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, |
195 | struct __kernel_timespec *ts) |
196 | { |
197 | return -1; |
198 | } |
199 | #endif |
200 | |
201 | static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk, |
202 | struct __kernel_timespec *ts) |
203 | { |
204 | const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; |
205 | u32 seq; |
206 | |
207 | do { |
208 | /* |
209 | * Open coded to handle VDSO_CLOCK_TIMENS. See comment in |
210 | * do_hres(). |
211 | */ |
212 | while ((seq = READ_ONCE(vd->seq)) & 1) { |
213 | if (IS_ENABLED(CONFIG_TIME_NS) && |
214 | vd->clock_mode == VDSO_CLOCKMODE_TIMENS) |
215 | return do_coarse_timens(vdns: vd, clk, ts); |
216 | cpu_relax(); |
217 | } |
218 | smp_rmb(); |
219 | |
220 | ts->tv_sec = vdso_ts->sec; |
221 | ts->tv_nsec = vdso_ts->nsec; |
222 | } while (unlikely(vdso_read_retry(vd, seq))); |
223 | |
224 | return 0; |
225 | } |
226 | |
227 | static __always_inline int |
228 | __cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock, |
229 | struct __kernel_timespec *ts) |
230 | { |
231 | u32 msk; |
232 | |
233 | /* Check for negative values or invalid clocks */ |
234 | if (unlikely((u32) clock >= MAX_CLOCKS)) |
235 | return -1; |
236 | |
237 | /* |
238 | * Convert the clockid to a bitmask and use it to check which |
239 | * clocks are handled in the VDSO directly. |
240 | */ |
241 | msk = 1U << clock; |
242 | if (likely(msk & VDSO_HRES)) |
243 | vd = &vd[CS_HRES_COARSE]; |
244 | else if (msk & VDSO_COARSE) |
245 | return do_coarse(vd: &vd[CS_HRES_COARSE], clk: clock, ts); |
246 | else if (msk & VDSO_RAW) |
247 | vd = &vd[CS_RAW]; |
248 | else |
249 | return -1; |
250 | |
251 | return do_hres(vd, clk: clock, ts); |
252 | } |
253 | |
254 | static __maybe_unused int |
255 | __cvdso_clock_gettime_data(const struct vdso_data *vd, clockid_t clock, |
256 | struct __kernel_timespec *ts) |
257 | { |
258 | int ret = __cvdso_clock_gettime_common(vd, clock, ts); |
259 | |
260 | if (unlikely(ret)) |
261 | return clock_gettime_fallback(clkid: clock, ts: ts); |
262 | return 0; |
263 | } |
264 | |
265 | static __maybe_unused int |
266 | __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) |
267 | { |
268 | return __cvdso_clock_gettime_data(vd: __arch_get_vdso_data(), clock, ts); |
269 | } |
270 | |
271 | #ifdef BUILD_VDSO32 |
272 | static __maybe_unused int |
273 | __cvdso_clock_gettime32_data(const struct vdso_data *vd, clockid_t clock, |
274 | struct old_timespec32 *res) |
275 | { |
276 | struct __kernel_timespec ts; |
277 | int ret; |
278 | |
279 | ret = __cvdso_clock_gettime_common(vd, clock, &ts); |
280 | |
281 | if (unlikely(ret)) |
282 | return clock_gettime32_fallback(clock, res); |
283 | |
284 | /* For ret == 0 */ |
285 | res->tv_sec = ts.tv_sec; |
286 | res->tv_nsec = ts.tv_nsec; |
287 | |
288 | return ret; |
289 | } |
290 | |
291 | static __maybe_unused int |
292 | __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) |
293 | { |
294 | return __cvdso_clock_gettime32_data(__arch_get_vdso_data(), clock, res); |
295 | } |
296 | #endif /* BUILD_VDSO32 */ |
297 | |
298 | static __maybe_unused int |
299 | __cvdso_gettimeofday_data(const struct vdso_data *vd, |
300 | struct __kernel_old_timeval *tv, struct timezone *tz) |
301 | { |
302 | |
303 | if (likely(tv != NULL)) { |
304 | struct __kernel_timespec ts; |
305 | |
306 | if (do_hres(vd: &vd[CS_HRES_COARSE], CLOCK_REALTIME, ts: &ts)) |
307 | return gettimeofday_fallback(tv: tv, tz: tz); |
308 | |
309 | tv->tv_sec = ts.tv_sec; |
310 | tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC; |
311 | } |
312 | |
313 | if (unlikely(tz != NULL)) { |
314 | if (IS_ENABLED(CONFIG_TIME_NS) && |
315 | vd->clock_mode == VDSO_CLOCKMODE_TIMENS) |
316 | vd = __arch_get_timens_vdso_data(vd); |
317 | |
318 | tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; |
319 | tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; |
320 | } |
321 | |
322 | return 0; |
323 | } |
324 | |
325 | static __maybe_unused int |
326 | __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) |
327 | { |
328 | return __cvdso_gettimeofday_data(vd: __arch_get_vdso_data(), tv, tz); |
329 | } |
330 | |
331 | #ifdef VDSO_HAS_TIME |
332 | static __maybe_unused __kernel_old_time_t |
333 | __cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time) |
334 | { |
335 | __kernel_old_time_t t; |
336 | |
337 | if (IS_ENABLED(CONFIG_TIME_NS) && |
338 | vd->clock_mode == VDSO_CLOCKMODE_TIMENS) |
339 | vd = __arch_get_timens_vdso_data(vd); |
340 | |
341 | t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); |
342 | |
343 | if (time) |
344 | *time = t; |
345 | |
346 | return t; |
347 | } |
348 | |
349 | static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time) |
350 | { |
351 | return __cvdso_time_data(vd: __arch_get_vdso_data(), time); |
352 | } |
353 | #endif /* VDSO_HAS_TIME */ |
354 | |
355 | #ifdef VDSO_HAS_CLOCK_GETRES |
356 | static __maybe_unused |
357 | int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock, |
358 | struct __kernel_timespec *res) |
359 | { |
360 | u32 msk; |
361 | u64 ns; |
362 | |
363 | /* Check for negative values or invalid clocks */ |
364 | if (unlikely((u32) clock >= MAX_CLOCKS)) |
365 | return -1; |
366 | |
367 | if (IS_ENABLED(CONFIG_TIME_NS) && |
368 | vd->clock_mode == VDSO_CLOCKMODE_TIMENS) |
369 | vd = __arch_get_timens_vdso_data(vd); |
370 | |
371 | /* |
372 | * Convert the clockid to a bitmask and use it to check which |
373 | * clocks are handled in the VDSO directly. |
374 | */ |
375 | msk = 1U << clock; |
376 | if (msk & (VDSO_HRES | VDSO_RAW)) { |
377 | /* |
378 | * Preserves the behaviour of posix_get_hrtimer_res(). |
379 | */ |
380 | ns = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); |
381 | } else if (msk & VDSO_COARSE) { |
382 | /* |
383 | * Preserves the behaviour of posix_get_coarse_res(). |
384 | */ |
385 | ns = LOW_RES_NSEC; |
386 | } else { |
387 | return -1; |
388 | } |
389 | |
390 | if (likely(res)) { |
391 | res->tv_sec = 0; |
392 | res->tv_nsec = ns; |
393 | } |
394 | return 0; |
395 | } |
396 | |
397 | static __maybe_unused |
398 | int __cvdso_clock_getres_data(const struct vdso_data *vd, clockid_t clock, |
399 | struct __kernel_timespec *res) |
400 | { |
401 | int ret = __cvdso_clock_getres_common(vd, clock, res); |
402 | |
403 | if (unlikely(ret)) |
404 | return clock_getres_fallback(clkid: clock, ts: res); |
405 | return 0; |
406 | } |
407 | |
408 | static __maybe_unused |
409 | int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) |
410 | { |
411 | return __cvdso_clock_getres_data(vd: __arch_get_vdso_data(), clock, res); |
412 | } |
413 | |
414 | #ifdef BUILD_VDSO32 |
415 | static __maybe_unused int |
416 | __cvdso_clock_getres_time32_data(const struct vdso_data *vd, clockid_t clock, |
417 | struct old_timespec32 *res) |
418 | { |
419 | struct __kernel_timespec ts; |
420 | int ret; |
421 | |
422 | ret = __cvdso_clock_getres_common(vd, clock, &ts); |
423 | |
424 | if (unlikely(ret)) |
425 | return clock_getres32_fallback(clock, res); |
426 | |
427 | if (likely(res)) { |
428 | res->tv_sec = ts.tv_sec; |
429 | res->tv_nsec = ts.tv_nsec; |
430 | } |
431 | return ret; |
432 | } |
433 | |
434 | static __maybe_unused int |
435 | __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) |
436 | { |
437 | return __cvdso_clock_getres_time32_data(__arch_get_vdso_data(), |
438 | clock, res); |
439 | } |
440 | #endif /* BUILD_VDSO32 */ |
441 | #endif /* VDSO_HAS_CLOCK_GETRES */ |
442 | |