1 | /* |
2 | * VMware Detection code. |
3 | * |
4 | * Copyright (C) 2008, VMware, Inc. |
5 | * Author : Alok N Kataria <akataria@vmware.com> |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or |
10 | * (at your option) any later version. |
11 | * |
12 | * This program is distributed in the hope that it will be useful, but |
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or |
15 | * NON INFRINGEMENT. See the GNU General Public License for more |
16 | * details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License |
19 | * along with this program; if not, write to the Free Software |
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
21 | * |
22 | */ |
23 | |
24 | #include <linux/dmi.h> |
25 | #include <linux/init.h> |
26 | #include <linux/export.h> |
27 | #include <linux/clocksource.h> |
28 | #include <linux/cpu.h> |
29 | #include <linux/reboot.h> |
30 | #include <linux/static_call.h> |
31 | #include <asm/div64.h> |
32 | #include <asm/x86_init.h> |
33 | #include <asm/hypervisor.h> |
34 | #include <asm/timer.h> |
35 | #include <asm/apic.h> |
36 | #include <asm/vmware.h> |
37 | #include <asm/svm.h> |
38 | |
39 | #undef pr_fmt |
40 | #define pr_fmt(fmt) "vmware: " fmt |
41 | |
42 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
43 | #define CPUID_VMWARE_FEATURES_LEAF 0x40000010 |
44 | #define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0) |
45 | #define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1) |
46 | |
47 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
48 | |
49 | #define VMWARE_CMD_GETVERSION 10 |
50 | #define VMWARE_CMD_GETHZ 45 |
51 | #define VMWARE_CMD_GETVCPU_INFO 68 |
52 | #define VMWARE_CMD_LEGACY_X2APIC 3 |
53 | #define VMWARE_CMD_VCPU_RESERVED 31 |
54 | #define VMWARE_CMD_STEALCLOCK 91 |
55 | |
56 | #define STEALCLOCK_NOT_AVAILABLE (-1) |
57 | #define STEALCLOCK_DISABLED 0 |
58 | #define STEALCLOCK_ENABLED 1 |
59 | |
60 | #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ |
61 | __asm__("inl (%%dx), %%eax" : \ |
62 | "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ |
63 | "a"(VMWARE_HYPERVISOR_MAGIC), \ |
64 | "c"(VMWARE_CMD_##cmd), \ |
65 | "d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) : \ |
66 | "memory") |
67 | |
68 | #define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx) \ |
69 | __asm__("vmcall" : \ |
70 | "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ |
71 | "a"(VMWARE_HYPERVISOR_MAGIC), \ |
72 | "c"(VMWARE_CMD_##cmd), \ |
73 | "d"(0), "b"(UINT_MAX) : \ |
74 | "memory") |
75 | |
76 | #define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx) \ |
77 | __asm__("vmmcall" : \ |
78 | "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ |
79 | "a"(VMWARE_HYPERVISOR_MAGIC), \ |
80 | "c"(VMWARE_CMD_##cmd), \ |
81 | "d"(0), "b"(UINT_MAX) : \ |
82 | "memory") |
83 | |
84 | #define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do { \ |
85 | switch (vmware_hypercall_mode) { \ |
86 | case CPUID_VMWARE_FEATURES_ECX_VMCALL: \ |
87 | VMWARE_VMCALL(cmd, eax, ebx, ecx, edx); \ |
88 | break; \ |
89 | case CPUID_VMWARE_FEATURES_ECX_VMMCALL: \ |
90 | VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx); \ |
91 | break; \ |
92 | default: \ |
93 | VMWARE_PORT(cmd, eax, ebx, ecx, edx); \ |
94 | break; \ |
95 | } \ |
96 | } while (0) |
97 | |
98 | struct vmware_steal_time { |
99 | union { |
100 | uint64_t clock; /* stolen time counter in units of vtsc */ |
101 | struct { |
102 | /* only for little-endian */ |
103 | uint32_t clock_low; |
104 | uint32_t clock_high; |
105 | }; |
106 | }; |
107 | uint64_t reserved[7]; |
108 | }; |
109 | |
110 | static unsigned long vmware_tsc_khz __ro_after_init; |
111 | static u8 vmware_hypercall_mode __ro_after_init; |
112 | |
113 | static inline int __vmware_platform(void) |
114 | { |
115 | uint32_t eax, ebx, ecx, edx; |
116 | VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx); |
117 | return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; |
118 | } |
119 | |
120 | static unsigned long vmware_get_tsc_khz(void) |
121 | { |
122 | return vmware_tsc_khz; |
123 | } |
124 | |
125 | #ifdef CONFIG_PARAVIRT |
126 | static struct cyc2ns_data vmware_cyc2ns __ro_after_init; |
127 | static bool vmw_sched_clock __initdata = true; |
128 | static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64); |
129 | static bool has_steal_clock; |
130 | static bool steal_acc __initdata = true; /* steal time accounting */ |
131 | |
132 | static __init int setup_vmw_sched_clock(char *s) |
133 | { |
134 | vmw_sched_clock = false; |
135 | return 0; |
136 | } |
137 | early_param("no-vmw-sched-clock" , setup_vmw_sched_clock); |
138 | |
139 | static __init int parse_no_stealacc(char *arg) |
140 | { |
141 | steal_acc = false; |
142 | return 0; |
143 | } |
144 | early_param("no-steal-acc" , parse_no_stealacc); |
145 | |
146 | static noinstr u64 vmware_sched_clock(void) |
147 | { |
148 | unsigned long long ns; |
149 | |
150 | ns = mul_u64_u32_shr(a: rdtsc(), mul: vmware_cyc2ns.cyc2ns_mul, |
151 | shift: vmware_cyc2ns.cyc2ns_shift); |
152 | ns -= vmware_cyc2ns.cyc2ns_offset; |
153 | return ns; |
154 | } |
155 | |
156 | static void __init vmware_cyc2ns_setup(void) |
157 | { |
158 | struct cyc2ns_data *d = &vmware_cyc2ns; |
159 | unsigned long long tsc_now = rdtsc(); |
160 | |
161 | clocks_calc_mult_shift(mult: &d->cyc2ns_mul, shift: &d->cyc2ns_shift, |
162 | from: vmware_tsc_khz, NSEC_PER_MSEC, minsec: 0); |
163 | d->cyc2ns_offset = mul_u64_u32_shr(a: tsc_now, mul: d->cyc2ns_mul, |
164 | shift: d->cyc2ns_shift); |
165 | |
166 | pr_info("using clock offset of %llu ns\n" , d->cyc2ns_offset); |
167 | } |
168 | |
169 | static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2) |
170 | { |
171 | uint32_t result, info; |
172 | |
173 | asm volatile (VMWARE_HYPERCALL : |
174 | "=a" (result), |
175 | "=c" (info) : |
176 | "a" (VMWARE_HYPERVISOR_MAGIC), |
177 | "b" (0), |
178 | "c" (VMWARE_CMD_STEALCLOCK), |
179 | "d" (0), |
180 | "S" (arg1), |
181 | "D" (arg2) : |
182 | "memory" ); |
183 | return result; |
184 | } |
185 | |
186 | static bool stealclock_enable(phys_addr_t pa) |
187 | { |
188 | return vmware_cmd_stealclock(upper_32_bits(pa), |
189 | lower_32_bits(pa)) == STEALCLOCK_ENABLED; |
190 | } |
191 | |
192 | static int __stealclock_disable(void) |
193 | { |
194 | return vmware_cmd_stealclock(arg1: 0, arg2: 1); |
195 | } |
196 | |
197 | static void stealclock_disable(void) |
198 | { |
199 | __stealclock_disable(); |
200 | } |
201 | |
202 | static bool vmware_is_stealclock_available(void) |
203 | { |
204 | return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE; |
205 | } |
206 | |
207 | /** |
208 | * vmware_steal_clock() - read the per-cpu steal clock |
209 | * @cpu: the cpu number whose steal clock we want to read |
210 | * |
211 | * The function reads the steal clock if we are on a 64-bit system, otherwise |
212 | * reads it in parts, checking that the high part didn't change in the |
213 | * meantime. |
214 | * |
215 | * Return: |
216 | * The steal clock reading in ns. |
217 | */ |
218 | static uint64_t vmware_steal_clock(int cpu) |
219 | { |
220 | struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu); |
221 | uint64_t clock; |
222 | |
223 | if (IS_ENABLED(CONFIG_64BIT)) |
224 | clock = READ_ONCE(steal->clock); |
225 | else { |
226 | uint32_t initial_high, low, high; |
227 | |
228 | do { |
229 | initial_high = READ_ONCE(steal->clock_high); |
230 | /* Do not reorder initial_high and high readings */ |
231 | virt_rmb(); |
232 | low = READ_ONCE(steal->clock_low); |
233 | /* Keep low reading in between */ |
234 | virt_rmb(); |
235 | high = READ_ONCE(steal->clock_high); |
236 | } while (initial_high != high); |
237 | |
238 | clock = ((uint64_t)high << 32) | low; |
239 | } |
240 | |
241 | return mul_u64_u32_shr(a: clock, mul: vmware_cyc2ns.cyc2ns_mul, |
242 | shift: vmware_cyc2ns.cyc2ns_shift); |
243 | } |
244 | |
245 | static void vmware_register_steal_time(void) |
246 | { |
247 | int cpu = smp_processor_id(); |
248 | struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu); |
249 | |
250 | if (!has_steal_clock) |
251 | return; |
252 | |
253 | if (!stealclock_enable(pa: slow_virt_to_phys(address: st))) { |
254 | has_steal_clock = false; |
255 | return; |
256 | } |
257 | |
258 | pr_info("vmware-stealtime: cpu %d, pa %llx\n" , |
259 | cpu, (unsigned long long) slow_virt_to_phys(st)); |
260 | } |
261 | |
262 | static void vmware_disable_steal_time(void) |
263 | { |
264 | if (!has_steal_clock) |
265 | return; |
266 | |
267 | stealclock_disable(); |
268 | } |
269 | |
270 | static void vmware_guest_cpu_init(void) |
271 | { |
272 | if (has_steal_clock) |
273 | vmware_register_steal_time(); |
274 | } |
275 | |
276 | static void vmware_pv_guest_cpu_reboot(void *unused) |
277 | { |
278 | vmware_disable_steal_time(); |
279 | } |
280 | |
281 | static int vmware_pv_reboot_notify(struct notifier_block *nb, |
282 | unsigned long code, void *unused) |
283 | { |
284 | if (code == SYS_RESTART) |
285 | on_each_cpu(func: vmware_pv_guest_cpu_reboot, NULL, wait: 1); |
286 | return NOTIFY_DONE; |
287 | } |
288 | |
289 | static struct notifier_block vmware_pv_reboot_nb = { |
290 | .notifier_call = vmware_pv_reboot_notify, |
291 | }; |
292 | |
293 | #ifdef CONFIG_SMP |
294 | static void __init vmware_smp_prepare_boot_cpu(void) |
295 | { |
296 | vmware_guest_cpu_init(); |
297 | native_smp_prepare_boot_cpu(); |
298 | } |
299 | |
300 | static int vmware_cpu_online(unsigned int cpu) |
301 | { |
302 | local_irq_disable(); |
303 | vmware_guest_cpu_init(); |
304 | local_irq_enable(); |
305 | return 0; |
306 | } |
307 | |
308 | static int vmware_cpu_down_prepare(unsigned int cpu) |
309 | { |
310 | local_irq_disable(); |
311 | vmware_disable_steal_time(); |
312 | local_irq_enable(); |
313 | return 0; |
314 | } |
315 | #endif |
316 | |
317 | static __init int activate_jump_labels(void) |
318 | { |
319 | if (has_steal_clock) { |
320 | static_key_slow_inc(key: ¶virt_steal_enabled); |
321 | if (steal_acc) |
322 | static_key_slow_inc(key: ¶virt_steal_rq_enabled); |
323 | } |
324 | |
325 | return 0; |
326 | } |
327 | arch_initcall(activate_jump_labels); |
328 | |
329 | static void __init vmware_paravirt_ops_setup(void) |
330 | { |
331 | pv_info.name = "VMware hypervisor" ; |
332 | pv_ops.cpu.io_delay = paravirt_nop; |
333 | |
334 | if (vmware_tsc_khz == 0) |
335 | return; |
336 | |
337 | vmware_cyc2ns_setup(); |
338 | |
339 | if (vmw_sched_clock) |
340 | paravirt_set_sched_clock(func: vmware_sched_clock); |
341 | |
342 | if (vmware_is_stealclock_available()) { |
343 | has_steal_clock = true; |
344 | static_call_update(pv_steal_clock, vmware_steal_clock); |
345 | |
346 | /* We use reboot notifier only to disable steal clock */ |
347 | register_reboot_notifier(&vmware_pv_reboot_nb); |
348 | |
349 | #ifdef CONFIG_SMP |
350 | smp_ops.smp_prepare_boot_cpu = |
351 | vmware_smp_prepare_boot_cpu; |
352 | if (cpuhp_setup_state_nocalls(state: CPUHP_AP_ONLINE_DYN, |
353 | name: "x86/vmware:online" , |
354 | startup: vmware_cpu_online, |
355 | teardown: vmware_cpu_down_prepare) < 0) |
356 | pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n" ); |
357 | #else |
358 | vmware_guest_cpu_init(); |
359 | #endif |
360 | } |
361 | } |
362 | #else |
363 | #define vmware_paravirt_ops_setup() do {} while (0) |
364 | #endif |
365 | |
366 | /* |
367 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
368 | * Still, due to timing difference when running on virtual cpus, the TSC can |
369 | * be marked as unstable in some cases. For example, the TSC sync check at |
370 | * bootup can fail due to a marginal offset between vcpus' TSCs (though the |
371 | * TSCs do not drift from each other). Also, the ACPI PM timer clocksource |
372 | * is not suitable as a watchdog when running on a hypervisor because the |
373 | * kernel may miss a wrap of the counter if the vcpu is descheduled for a |
374 | * long time. To skip these checks at runtime we set these capability bits, |
375 | * so that the kernel could just trust the hypervisor with providing a |
376 | * reliable virtual TSC that is suitable for timekeeping. |
377 | */ |
378 | static void __init vmware_set_capabilities(void) |
379 | { |
380 | setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC); |
381 | setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); |
382 | if (vmware_tsc_khz) |
383 | setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); |
384 | if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL) |
385 | setup_force_cpu_cap(X86_FEATURE_VMCALL); |
386 | else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL) |
387 | setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL); |
388 | } |
389 | |
390 | static void __init vmware_platform_setup(void) |
391 | { |
392 | uint32_t eax, ebx, ecx, edx; |
393 | uint64_t lpj, tsc_khz; |
394 | |
395 | VMWARE_CMD(GETHZ, eax, ebx, ecx, edx); |
396 | |
397 | if (ebx != UINT_MAX) { |
398 | lpj = tsc_khz = eax | (((uint64_t)ebx) << 32); |
399 | do_div(tsc_khz, 1000); |
400 | WARN_ON(tsc_khz >> 32); |
401 | pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n" , |
402 | (unsigned long) tsc_khz / 1000, |
403 | (unsigned long) tsc_khz % 1000); |
404 | |
405 | if (!preset_lpj) { |
406 | do_div(lpj, HZ); |
407 | preset_lpj = lpj; |
408 | } |
409 | |
410 | vmware_tsc_khz = tsc_khz; |
411 | x86_platform.calibrate_tsc = vmware_get_tsc_khz; |
412 | x86_platform.calibrate_cpu = vmware_get_tsc_khz; |
413 | |
414 | #ifdef CONFIG_X86_LOCAL_APIC |
415 | /* Skip lapic calibration since we know the bus frequency. */ |
416 | lapic_timer_period = ecx / HZ; |
417 | pr_info("Host bus clock speed read from hypervisor : %u Hz\n" , |
418 | ecx); |
419 | #endif |
420 | } else { |
421 | pr_warn("Failed to get TSC freq from the hypervisor\n" ); |
422 | } |
423 | |
424 | vmware_paravirt_ops_setup(); |
425 | |
426 | #ifdef CONFIG_X86_IO_APIC |
427 | no_timer_check = 1; |
428 | #endif |
429 | |
430 | vmware_set_capabilities(); |
431 | } |
432 | |
433 | static u8 __init vmware_select_hypercall(void) |
434 | { |
435 | int eax, ebx, ecx, edx; |
436 | |
437 | cpuid(CPUID_VMWARE_FEATURES_LEAF, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
438 | return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL | |
439 | CPUID_VMWARE_FEATURES_ECX_VMCALL)); |
440 | } |
441 | |
442 | /* |
443 | * While checking the dmi string information, just checking the product |
444 | * serial key should be enough, as this will always have a VMware |
445 | * specific string when running under VMware hypervisor. |
446 | * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode |
447 | * intentionally defaults to 0. |
448 | */ |
449 | static uint32_t __init vmware_platform(void) |
450 | { |
451 | if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { |
452 | unsigned int eax; |
453 | unsigned int hyper_vendor_id[3]; |
454 | |
455 | cpuid(CPUID_VMWARE_INFO_LEAF, eax: &eax, ebx: &hyper_vendor_id[0], |
456 | ecx: &hyper_vendor_id[1], edx: &hyper_vendor_id[2]); |
457 | if (!memcmp(p: hyper_vendor_id, q: "VMwareVMware" , size: 12)) { |
458 | if (eax >= CPUID_VMWARE_FEATURES_LEAF) |
459 | vmware_hypercall_mode = |
460 | vmware_select_hypercall(); |
461 | |
462 | pr_info("hypercall mode: 0x%02x\n" , |
463 | (unsigned int) vmware_hypercall_mode); |
464 | |
465 | return CPUID_VMWARE_INFO_LEAF; |
466 | } |
467 | } else if (dmi_available && dmi_name_in_serial(str: "VMware" ) && |
468 | __vmware_platform()) |
469 | return 1; |
470 | |
471 | return 0; |
472 | } |
473 | |
474 | /* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ |
475 | static bool __init vmware_legacy_x2apic_available(void) |
476 | { |
477 | uint32_t eax, ebx, ecx, edx; |
478 | VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx); |
479 | return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) && |
480 | (eax & BIT(VMWARE_CMD_LEGACY_X2APIC)); |
481 | } |
482 | |
483 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
484 | static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb, |
485 | struct pt_regs *regs) |
486 | { |
487 | /* Copy VMWARE specific Hypercall parameters to the GHCB */ |
488 | ghcb_set_rip(ghcb, value: regs->ip); |
489 | ghcb_set_rbx(ghcb, value: regs->bx); |
490 | ghcb_set_rcx(ghcb, value: regs->cx); |
491 | ghcb_set_rdx(ghcb, value: regs->dx); |
492 | ghcb_set_rsi(ghcb, value: regs->si); |
493 | ghcb_set_rdi(ghcb, value: regs->di); |
494 | ghcb_set_rbp(ghcb, value: regs->bp); |
495 | } |
496 | |
497 | static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs) |
498 | { |
499 | if (!(ghcb_rbx_is_valid(ghcb) && |
500 | ghcb_rcx_is_valid(ghcb) && |
501 | ghcb_rdx_is_valid(ghcb) && |
502 | ghcb_rsi_is_valid(ghcb) && |
503 | ghcb_rdi_is_valid(ghcb) && |
504 | ghcb_rbp_is_valid(ghcb))) |
505 | return false; |
506 | |
507 | regs->bx = ghcb_get_rbx(ghcb); |
508 | regs->cx = ghcb_get_rcx(ghcb); |
509 | regs->dx = ghcb_get_rdx(ghcb); |
510 | regs->si = ghcb_get_rsi(ghcb); |
511 | regs->di = ghcb_get_rdi(ghcb); |
512 | regs->bp = ghcb_get_rbp(ghcb); |
513 | |
514 | return true; |
515 | } |
516 | #endif |
517 | |
518 | const __initconst struct hypervisor_x86 x86_hyper_vmware = { |
519 | .name = "VMware" , |
520 | .detect = vmware_platform, |
521 | .type = X86_HYPER_VMWARE, |
522 | .init.init_platform = vmware_platform_setup, |
523 | .init.x2apic_available = vmware_legacy_x2apic_available, |
524 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
525 | .runtime.sev_es_hcall_prepare = vmware_sev_es_hcall_prepare, |
526 | .runtime.sev_es_hcall_finish = vmware_sev_es_hcall_finish, |
527 | #endif |
528 | }; |
529 | |