1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
4 | * |
5 | * Copyright (C) 2019 ARM Limited. |
6 | * Copyright 2006 Andi Kleen, SUSE Labs. |
7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> |
8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany |
9 | */ |
10 | #ifndef __ASM_VDSO_GETTIMEOFDAY_H |
11 | #define __ASM_VDSO_GETTIMEOFDAY_H |
12 | |
13 | #ifndef __ASSEMBLY__ |
14 | |
15 | #include <uapi/linux/time.h> |
16 | #include <asm/vgtod.h> |
17 | #include <asm/vvar.h> |
18 | #include <asm/unistd.h> |
19 | #include <asm/msr.h> |
20 | #include <asm/pvclock.h> |
21 | #include <clocksource/hyperv_timer.h> |
22 | |
23 | #define __vdso_data (VVAR(_vdso_data)) |
24 | #define __timens_vdso_data (TIMENS(_vdso_data)) |
25 | |
26 | #define VDSO_HAS_TIME 1 |
27 | |
28 | #define VDSO_HAS_CLOCK_GETRES 1 |
29 | |
30 | /* |
31 | * Declare the memory-mapped vclock data pages. These come from hypervisors. |
32 | * If we ever reintroduce something like direct access to an MMIO clock like |
33 | * the HPET again, it will go here as well. |
34 | * |
35 | * A load from any of these pages will segfault if the clock in question is |
36 | * disabled, so appropriate compiler barriers and checks need to be used |
37 | * to prevent stray loads. |
38 | * |
39 | * These declarations MUST NOT be const. The compiler will assume that |
40 | * an extern const variable has genuinely constant contents, and the |
41 | * resulting code won't work, since the whole point is that these pages |
42 | * change over time, possibly while we're accessing them. |
43 | */ |
44 | |
45 | #ifdef CONFIG_PARAVIRT_CLOCK |
46 | /* |
47 | * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO |
48 | * if the hypervisor tells us that all vCPUs can get valid data from the |
49 | * vCPU 0 page. |
50 | */ |
51 | extern struct pvclock_vsyscall_time_info pvclock_page |
52 | __attribute__((visibility("hidden" ))); |
53 | #endif |
54 | |
55 | #ifdef CONFIG_HYPERV_TIMER |
56 | extern struct ms_hyperv_tsc_page hvclock_page |
57 | __attribute__((visibility("hidden" ))); |
58 | #endif |
59 | |
60 | #ifdef CONFIG_TIME_NS |
61 | static __always_inline |
62 | const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) |
63 | { |
64 | return __timens_vdso_data; |
65 | } |
66 | #endif |
67 | |
68 | #ifndef BUILD_VDSO32 |
69 | |
70 | static __always_inline |
71 | long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) |
72 | { |
73 | long ret; |
74 | |
75 | asm ("syscall" : "=a" (ret), "=m" (*_ts) : |
76 | "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : |
77 | "rcx" , "r11" ); |
78 | |
79 | return ret; |
80 | } |
81 | |
82 | static __always_inline |
83 | long gettimeofday_fallback(struct __kernel_old_timeval *_tv, |
84 | struct timezone *_tz) |
85 | { |
86 | long ret; |
87 | |
88 | asm("syscall" : "=a" (ret) : |
89 | "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory" ); |
90 | |
91 | return ret; |
92 | } |
93 | |
94 | static __always_inline |
95 | long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) |
96 | { |
97 | long ret; |
98 | |
99 | asm ("syscall" : "=a" (ret), "=m" (*_ts) : |
100 | "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : |
101 | "rcx" , "r11" ); |
102 | |
103 | return ret; |
104 | } |
105 | |
106 | #else |
107 | |
108 | static __always_inline |
109 | long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) |
110 | { |
111 | long ret; |
112 | |
113 | asm ( |
114 | "mov %%ebx, %%edx \n" |
115 | "mov %[clock], %%ebx \n" |
116 | "call __kernel_vsyscall \n" |
117 | "mov %%edx, %%ebx \n" |
118 | : "=a" (ret), "=m" (*_ts) |
119 | : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) |
120 | : "edx" ); |
121 | |
122 | return ret; |
123 | } |
124 | |
125 | static __always_inline |
126 | long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) |
127 | { |
128 | long ret; |
129 | |
130 | asm ( |
131 | "mov %%ebx, %%edx \n" |
132 | "mov %[clock], %%ebx \n" |
133 | "call __kernel_vsyscall \n" |
134 | "mov %%edx, %%ebx \n" |
135 | : "=a" (ret), "=m" (*_ts) |
136 | : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts) |
137 | : "edx" ); |
138 | |
139 | return ret; |
140 | } |
141 | |
142 | static __always_inline |
143 | long gettimeofday_fallback(struct __kernel_old_timeval *_tv, |
144 | struct timezone *_tz) |
145 | { |
146 | long ret; |
147 | |
148 | asm( |
149 | "mov %%ebx, %%edx \n" |
150 | "mov %2, %%ebx \n" |
151 | "call __kernel_vsyscall \n" |
152 | "mov %%edx, %%ebx \n" |
153 | : "=a" (ret) |
154 | : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) |
155 | : "memory" , "edx" ); |
156 | |
157 | return ret; |
158 | } |
159 | |
160 | static __always_inline long |
161 | clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) |
162 | { |
163 | long ret; |
164 | |
165 | asm ( |
166 | "mov %%ebx, %%edx \n" |
167 | "mov %[clock], %%ebx \n" |
168 | "call __kernel_vsyscall \n" |
169 | "mov %%edx, %%ebx \n" |
170 | : "=a" (ret), "=m" (*_ts) |
171 | : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) |
172 | : "edx" ); |
173 | |
174 | return ret; |
175 | } |
176 | |
177 | static __always_inline |
178 | long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) |
179 | { |
180 | long ret; |
181 | |
182 | asm ( |
183 | "mov %%ebx, %%edx \n" |
184 | "mov %[clock], %%ebx \n" |
185 | "call __kernel_vsyscall \n" |
186 | "mov %%edx, %%ebx \n" |
187 | : "=a" (ret), "=m" (*_ts) |
188 | : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts) |
189 | : "edx" ); |
190 | |
191 | return ret; |
192 | } |
193 | |
194 | #endif |
195 | |
196 | #ifdef CONFIG_PARAVIRT_CLOCK |
197 | static u64 vread_pvclock(void) |
198 | { |
199 | const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti; |
200 | u32 version; |
201 | u64 ret; |
202 | |
203 | /* |
204 | * Note: The kernel and hypervisor must guarantee that cpu ID |
205 | * number maps 1:1 to per-CPU pvclock time info. |
206 | * |
207 | * Because the hypervisor is entirely unaware of guest userspace |
208 | * preemption, it cannot guarantee that per-CPU pvclock time |
209 | * info is updated if the underlying CPU changes or that that |
210 | * version is increased whenever underlying CPU changes. |
211 | * |
212 | * On KVM, we are guaranteed that pvti updates for any vCPU are |
213 | * atomic as seen by *all* vCPUs. This is an even stronger |
214 | * guarantee than we get with a normal seqlock. |
215 | * |
216 | * On Xen, we don't appear to have that guarantee, but Xen still |
217 | * supplies a valid seqlock using the version field. |
218 | * |
219 | * We only do pvclock vdso timing at all if |
220 | * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to |
221 | * mean that all vCPUs have matching pvti and that the TSC is |
222 | * synced, so we can just look at vCPU 0's pvti. |
223 | */ |
224 | |
225 | do { |
226 | version = pvclock_read_begin(src: pvti); |
227 | |
228 | if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) |
229 | return U64_MAX; |
230 | |
231 | ret = __pvclock_read_cycles(src: pvti, tsc: rdtsc_ordered()); |
232 | } while (pvclock_read_retry(src: pvti, version)); |
233 | |
234 | return ret & S64_MAX; |
235 | } |
236 | #endif |
237 | |
238 | #ifdef CONFIG_HYPERV_TIMER |
239 | static u64 vread_hvclock(void) |
240 | { |
241 | u64 tsc, time; |
242 | |
243 | if (hv_read_tsc_page_tsc(tsc_pg: &hvclock_page, cur_tsc: &tsc, time: &time)) |
244 | return time & S64_MAX; |
245 | |
246 | return U64_MAX; |
247 | } |
248 | #endif |
249 | |
250 | static inline u64 __arch_get_hw_counter(s32 clock_mode, |
251 | const struct vdso_data *vd) |
252 | { |
253 | if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) |
254 | return (u64)rdtsc_ordered() & S64_MAX; |
255 | /* |
256 | * For any memory-mapped vclock type, we need to make sure that gcc |
257 | * doesn't cleverly hoist a load before the mode check. Otherwise we |
258 | * might end up touching the memory-mapped page even if the vclock in |
259 | * question isn't enabled, which will segfault. Hence the barriers. |
260 | */ |
261 | #ifdef CONFIG_PARAVIRT_CLOCK |
262 | if (clock_mode == VDSO_CLOCKMODE_PVCLOCK) { |
263 | barrier(); |
264 | return vread_pvclock(); |
265 | } |
266 | #endif |
267 | #ifdef CONFIG_HYPERV_TIMER |
268 | if (clock_mode == VDSO_CLOCKMODE_HVCLOCK) { |
269 | barrier(); |
270 | return vread_hvclock(); |
271 | } |
272 | #endif |
273 | return U64_MAX; |
274 | } |
275 | |
276 | static __always_inline const struct vdso_data *__arch_get_vdso_data(void) |
277 | { |
278 | return __vdso_data; |
279 | } |
280 | |
281 | static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) |
282 | { |
283 | return true; |
284 | } |
285 | #define vdso_clocksource_ok arch_vdso_clocksource_ok |
286 | |
287 | /* |
288 | * Clocksource read value validation to handle PV and HyperV clocksources |
289 | * which can be invalidated asynchronously and indicate invalidation by |
290 | * returning U64_MAX, which can be effectively tested by checking for a |
291 | * negative value after casting it to s64. |
292 | * |
293 | * This effectively forces a S64_MAX mask on the calculations, unlike the |
294 | * U64_MAX mask normally used by x86 clocksources. |
295 | */ |
296 | static inline bool arch_vdso_cycles_ok(u64 cycles) |
297 | { |
298 | return (s64)cycles >= 0; |
299 | } |
300 | #define vdso_cycles_ok arch_vdso_cycles_ok |
301 | |
302 | /* |
303 | * x86 specific delta calculation. |
304 | * |
305 | * The regular implementation assumes that clocksource reads are globally |
306 | * monotonic. The TSC can be slightly off across sockets which can cause |
307 | * the regular delta calculation (@cycles - @last) to return a huge time |
308 | * jump. |
309 | * |
310 | * Therefore it needs to be verified that @cycles are greater than |
311 | * @last. If not then use @last, which is the base time of the current |
312 | * conversion period. |
313 | * |
314 | * This variant also uses a custom mask because while the clocksource mask of |
315 | * all the VDSO capable clocksources on x86 is U64_MAX, the above code uses |
316 | * U64_MASK as an exception value, additionally arch_vdso_cycles_ok() above |
317 | * declares everything with the MSB/Sign-bit set as invalid. Therefore the |
318 | * effective mask is S64_MAX. |
319 | */ |
320 | static __always_inline |
321 | u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) |
322 | { |
323 | /* |
324 | * Due to the MSB/Sign-bit being used as invalid marker (see |
325 | * arch_vdso_cycles_valid() above), the effective mask is S64_MAX. |
326 | */ |
327 | u64 delta = (cycles - last) & S64_MAX; |
328 | |
329 | /* |
330 | * Due to the above mentioned TSC wobbles, filter out negative motion. |
331 | * Per the above masking, the effective sign bit is now bit 62. |
332 | */ |
333 | if (unlikely(delta & (1ULL << 62))) |
334 | return 0; |
335 | |
336 | return delta * mult; |
337 | } |
338 | #define vdso_calc_delta vdso_calc_delta |
339 | |
340 | #endif /* !__ASSEMBLY__ */ |
341 | |
342 | #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ |
343 | |