1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. |
4 | * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
5 | * |
6 | * KVM Xen emulation |
7 | */ |
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
9 | |
10 | #include "x86.h" |
11 | #include "xen.h" |
12 | #include "hyperv.h" |
13 | #include "lapic.h" |
14 | |
15 | #include <linux/eventfd.h> |
16 | #include <linux/kvm_host.h> |
17 | #include <linux/sched/stat.h> |
18 | |
19 | #include <trace/events/kvm.h> |
20 | #include <xen/interface/xen.h> |
21 | #include <xen/interface/vcpu.h> |
22 | #include <xen/interface/version.h> |
23 | #include <xen/interface/event_channel.h> |
24 | #include <xen/interface/sched.h> |
25 | |
26 | #include <asm/xen/cpuid.h> |
27 | |
28 | #include "cpuid.h" |
29 | #include "trace.h" |
30 | |
31 | static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm); |
32 | static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data); |
33 | static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r); |
34 | |
35 | DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); |
36 | |
37 | static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) |
38 | { |
39 | struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; |
40 | struct pvclock_wall_clock *wc; |
41 | gpa_t gpa = gfn_to_gpa(gfn); |
42 | u32 *wc_sec_hi; |
43 | u32 wc_version; |
44 | u64 wall_nsec; |
45 | int ret = 0; |
46 | int idx = srcu_read_lock(ssp: &kvm->srcu); |
47 | |
48 | if (gfn == KVM_XEN_INVALID_GFN) { |
49 | kvm_gpc_deactivate(gpc); |
50 | goto out; |
51 | } |
52 | |
53 | do { |
54 | ret = kvm_gpc_activate(gpc, gpa, PAGE_SIZE); |
55 | if (ret) |
56 | goto out; |
57 | |
58 | /* |
59 | * This code mirrors kvm_write_wall_clock() except that it writes |
60 | * directly through the pfn cache and doesn't mark the page dirty. |
61 | */ |
62 | wall_nsec = kvm_get_wall_clock_epoch(kvm); |
63 | |
64 | /* It could be invalid again already, so we need to check */ |
65 | read_lock_irq(&gpc->lock); |
66 | |
67 | if (gpc->valid) |
68 | break; |
69 | |
70 | read_unlock_irq(&gpc->lock); |
71 | } while (1); |
72 | |
73 | /* Paranoia checks on the 32-bit struct layout */ |
74 | BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900); |
75 | BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924); |
76 | BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); |
77 | |
78 | #ifdef CONFIG_X86_64 |
79 | /* Paranoia checks on the 64-bit struct layout */ |
80 | BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00); |
81 | BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c); |
82 | |
83 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { |
84 | struct shared_info *shinfo = gpc->khva; |
85 | |
86 | wc_sec_hi = &shinfo->wc_sec_hi; |
87 | wc = &shinfo->wc; |
88 | } else |
89 | #endif |
90 | { |
91 | struct compat_shared_info *shinfo = gpc->khva; |
92 | |
93 | wc_sec_hi = &shinfo->arch.wc_sec_hi; |
94 | wc = &shinfo->wc; |
95 | } |
96 | |
97 | /* Increment and ensure an odd value */ |
98 | wc_version = wc->version = (wc->version + 1) | 1; |
99 | smp_wmb(); |
100 | |
101 | wc->nsec = do_div(wall_nsec, NSEC_PER_SEC); |
102 | wc->sec = (u32)wall_nsec; |
103 | *wc_sec_hi = wall_nsec >> 32; |
104 | smp_wmb(); |
105 | |
106 | wc->version = wc_version + 1; |
107 | read_unlock_irq(&gpc->lock); |
108 | |
109 | kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE); |
110 | |
111 | out: |
112 | srcu_read_unlock(ssp: &kvm->srcu, idx); |
113 | return ret; |
114 | } |
115 | |
116 | void kvm_xen_inject_timer_irqs(struct kvm_vcpu *vcpu) |
117 | { |
118 | if (atomic_read(v: &vcpu->arch.xen.timer_pending) > 0) { |
119 | struct kvm_xen_evtchn e; |
120 | |
121 | e.vcpu_id = vcpu->vcpu_id; |
122 | e.vcpu_idx = vcpu->vcpu_idx; |
123 | e.port = vcpu->arch.xen.timer_virq; |
124 | e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; |
125 | |
126 | kvm_xen_set_evtchn(xe: &e, kvm: vcpu->kvm); |
127 | |
128 | vcpu->arch.xen.timer_expires = 0; |
129 | atomic_set(v: &vcpu->arch.xen.timer_pending, i: 0); |
130 | } |
131 | } |
132 | |
133 | static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer) |
134 | { |
135 | struct kvm_vcpu *vcpu = container_of(timer, struct kvm_vcpu, |
136 | arch.xen.timer); |
137 | struct kvm_xen_evtchn e; |
138 | int rc; |
139 | |
140 | if (atomic_read(v: &vcpu->arch.xen.timer_pending)) |
141 | return HRTIMER_NORESTART; |
142 | |
143 | e.vcpu_id = vcpu->vcpu_id; |
144 | e.vcpu_idx = vcpu->vcpu_idx; |
145 | e.port = vcpu->arch.xen.timer_virq; |
146 | e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; |
147 | |
148 | rc = kvm_xen_set_evtchn_fast(xe: &e, kvm: vcpu->kvm); |
149 | if (rc != -EWOULDBLOCK) { |
150 | vcpu->arch.xen.timer_expires = 0; |
151 | return HRTIMER_NORESTART; |
152 | } |
153 | |
154 | atomic_inc(v: &vcpu->arch.xen.timer_pending); |
155 | kvm_make_request(KVM_REQ_UNBLOCK, vcpu); |
156 | kvm_vcpu_kick(vcpu); |
157 | |
158 | return HRTIMER_NORESTART; |
159 | } |
160 | |
161 | static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_ns) |
162 | { |
163 | /* |
164 | * Avoid races with the old timer firing. Checking timer_expires |
165 | * to avoid calling hrtimer_cancel() will only have false positives |
166 | * so is fine. |
167 | */ |
168 | if (vcpu->arch.xen.timer_expires) |
169 | hrtimer_cancel(timer: &vcpu->arch.xen.timer); |
170 | |
171 | atomic_set(v: &vcpu->arch.xen.timer_pending, i: 0); |
172 | vcpu->arch.xen.timer_expires = guest_abs; |
173 | |
174 | if (delta_ns <= 0) { |
175 | xen_timer_callback(timer: &vcpu->arch.xen.timer); |
176 | } else { |
177 | ktime_t ktime_now = ktime_get(); |
178 | hrtimer_start(timer: &vcpu->arch.xen.timer, |
179 | ktime_add_ns(ktime_now, delta_ns), |
180 | mode: HRTIMER_MODE_ABS_HARD); |
181 | } |
182 | } |
183 | |
184 | static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu) |
185 | { |
186 | hrtimer_cancel(timer: &vcpu->arch.xen.timer); |
187 | vcpu->arch.xen.timer_expires = 0; |
188 | atomic_set(v: &vcpu->arch.xen.timer_pending, i: 0); |
189 | } |
190 | |
191 | static void kvm_xen_init_timer(struct kvm_vcpu *vcpu) |
192 | { |
193 | hrtimer_init(timer: &vcpu->arch.xen.timer, CLOCK_MONOTONIC, |
194 | mode: HRTIMER_MODE_ABS_HARD); |
195 | vcpu->arch.xen.timer.function = xen_timer_callback; |
196 | } |
197 | |
198 | static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) |
199 | { |
200 | struct kvm_vcpu_xen *vx = &v->arch.xen; |
201 | struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; |
202 | struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; |
203 | size_t user_len, user_len1, user_len2; |
204 | struct vcpu_runstate_info rs; |
205 | unsigned long flags; |
206 | size_t times_ofs; |
207 | uint8_t *update_bit = NULL; |
208 | uint64_t entry_time; |
209 | uint64_t *rs_times; |
210 | int *rs_state; |
211 | |
212 | /* |
213 | * The only difference between 32-bit and 64-bit versions of the |
214 | * runstate struct is the alignment of uint64_t in 32-bit, which |
215 | * means that the 64-bit version has an additional 4 bytes of |
216 | * padding after the first field 'state'. Let's be really really |
217 | * paranoid about that, and matching it with our internal data |
218 | * structures that we memcpy into it... |
219 | */ |
220 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); |
221 | BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); |
222 | BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); |
223 | #ifdef CONFIG_X86_64 |
224 | /* |
225 | * The 64-bit structure has 4 bytes of padding before 'state_entry_time' |
226 | * so each subsequent field is shifted by 4, and it's 4 bytes longer. |
227 | */ |
228 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != |
229 | offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); |
230 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != |
231 | offsetof(struct compat_vcpu_runstate_info, time) + 4); |
232 | BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4); |
233 | #endif |
234 | /* |
235 | * The state field is in the same place at the start of both structs, |
236 | * and is the same size (int) as vx->current_runstate. |
237 | */ |
238 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != |
239 | offsetof(struct compat_vcpu_runstate_info, state)); |
240 | BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != |
241 | sizeof(vx->current_runstate)); |
242 | BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != |
243 | sizeof(vx->current_runstate)); |
244 | |
245 | /* |
246 | * The state_entry_time field is 64 bits in both versions, and the |
247 | * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86 |
248 | * is little-endian means that it's in the last *byte* of the word. |
249 | * That detail is important later. |
250 | */ |
251 | BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != |
252 | sizeof(uint64_t)); |
253 | BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != |
254 | sizeof(uint64_t)); |
255 | BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); |
256 | |
257 | /* |
258 | * The time array is four 64-bit quantities in both versions, matching |
259 | * the vx->runstate_times and immediately following state_entry_time. |
260 | */ |
261 | BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != |
262 | offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t)); |
263 | BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != |
264 | offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t)); |
265 | BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != |
266 | sizeof_field(struct compat_vcpu_runstate_info, time)); |
267 | BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != |
268 | sizeof(vx->runstate_times)); |
269 | |
270 | if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { |
271 | user_len = sizeof(struct vcpu_runstate_info); |
272 | times_ofs = offsetof(struct vcpu_runstate_info, |
273 | state_entry_time); |
274 | } else { |
275 | user_len = sizeof(struct compat_vcpu_runstate_info); |
276 | times_ofs = offsetof(struct compat_vcpu_runstate_info, |
277 | state_entry_time); |
278 | } |
279 | |
280 | /* |
281 | * There are basically no alignment constraints. The guest can set it |
282 | * up so it crosses from one page to the next, and at arbitrary byte |
283 | * alignment (and the 32-bit ABI doesn't align the 64-bit integers |
284 | * anyway, even if the overall struct had been 64-bit aligned). |
285 | */ |
286 | if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { |
287 | user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); |
288 | user_len2 = user_len - user_len1; |
289 | } else { |
290 | user_len1 = user_len; |
291 | user_len2 = 0; |
292 | } |
293 | BUG_ON(user_len1 + user_len2 != user_len); |
294 | |
295 | retry: |
296 | /* |
297 | * Attempt to obtain the GPC lock on *both* (if there are two) |
298 | * gfn_to_pfn caches that cover the region. |
299 | */ |
300 | if (atomic) { |
301 | local_irq_save(flags); |
302 | if (!read_trylock(&gpc1->lock)) { |
303 | local_irq_restore(flags); |
304 | return; |
305 | } |
306 | } else { |
307 | read_lock_irqsave(&gpc1->lock, flags); |
308 | } |
309 | while (!kvm_gpc_check(gpc: gpc1, len: user_len1)) { |
310 | read_unlock_irqrestore(&gpc1->lock, flags); |
311 | |
312 | /* When invoked from kvm_sched_out() we cannot sleep */ |
313 | if (atomic) |
314 | return; |
315 | |
316 | if (kvm_gpc_refresh(gpc: gpc1, len: user_len1)) |
317 | return; |
318 | |
319 | read_lock_irqsave(&gpc1->lock, flags); |
320 | } |
321 | |
322 | if (likely(!user_len2)) { |
323 | /* |
324 | * Set up three pointers directly to the runstate_info |
325 | * struct in the guest (via the GPC). |
326 | * |
327 | * • @rs_state → state field |
328 | * • @rs_times → state_entry_time field. |
329 | * • @update_bit → last byte of state_entry_time, which |
330 | * contains the XEN_RUNSTATE_UPDATE bit. |
331 | */ |
332 | rs_state = gpc1->khva; |
333 | rs_times = gpc1->khva + times_ofs; |
334 | if (v->kvm->arch.xen.runstate_update_flag) |
335 | update_bit = ((void *)(&rs_times[1])) - 1; |
336 | } else { |
337 | /* |
338 | * The guest's runstate_info is split across two pages and we |
339 | * need to hold and validate both GPCs simultaneously. We can |
340 | * declare a lock ordering GPC1 > GPC2 because nothing else |
341 | * takes them more than one at a time. Set a subclass on the |
342 | * gpc1 lock to make lockdep shut up about it. |
343 | */ |
344 | lock_set_subclass(lock: &gpc1->lock.dep_map, subclass: 1, _THIS_IP_); |
345 | if (atomic) { |
346 | if (!read_trylock(&gpc2->lock)) { |
347 | read_unlock_irqrestore(&gpc1->lock, flags); |
348 | return; |
349 | } |
350 | } else { |
351 | read_lock(&gpc2->lock); |
352 | } |
353 | |
354 | if (!kvm_gpc_check(gpc: gpc2, len: user_len2)) { |
355 | read_unlock(&gpc2->lock); |
356 | read_unlock_irqrestore(&gpc1->lock, flags); |
357 | |
358 | /* When invoked from kvm_sched_out() we cannot sleep */ |
359 | if (atomic) |
360 | return; |
361 | |
362 | /* |
363 | * Use kvm_gpc_activate() here because if the runstate |
364 | * area was configured in 32-bit mode and only extends |
365 | * to the second page now because the guest changed to |
366 | * 64-bit mode, the second GPC won't have been set up. |
367 | */ |
368 | if (kvm_gpc_activate(gpc: gpc2, gpa: gpc1->gpa + user_len1, |
369 | len: user_len2)) |
370 | return; |
371 | |
372 | /* |
373 | * We dropped the lock on GPC1 so we have to go all the |
374 | * way back and revalidate that too. |
375 | */ |
376 | goto retry; |
377 | } |
378 | |
379 | /* |
380 | * In this case, the runstate_info struct will be assembled on |
381 | * the kernel stack (compat or not as appropriate) and will |
382 | * be copied to GPC1/GPC2 with a dual memcpy. Set up the three |
383 | * rs pointers accordingly. |
384 | */ |
385 | rs_times = &rs.state_entry_time; |
386 | |
387 | /* |
388 | * The rs_state pointer points to the start of what we'll |
389 | * copy to the guest, which in the case of a compat guest |
390 | * is the 32-bit field that the compiler thinks is padding. |
391 | */ |
392 | rs_state = ((void *)rs_times) - times_ofs; |
393 | |
394 | /* |
395 | * The update_bit is still directly in the guest memory, |
396 | * via one GPC or the other. |
397 | */ |
398 | if (v->kvm->arch.xen.runstate_update_flag) { |
399 | if (user_len1 >= times_ofs + sizeof(uint64_t)) |
400 | update_bit = gpc1->khva + times_ofs + |
401 | sizeof(uint64_t) - 1; |
402 | else |
403 | update_bit = gpc2->khva + times_ofs + |
404 | sizeof(uint64_t) - 1 - user_len1; |
405 | } |
406 | |
407 | #ifdef CONFIG_X86_64 |
408 | /* |
409 | * Don't leak kernel memory through the padding in the 64-bit |
410 | * version of the struct. |
411 | */ |
412 | memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time)); |
413 | #endif |
414 | } |
415 | |
416 | /* |
417 | * First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the |
418 | * state_entry_time field, directly in the guest. We need to set |
419 | * that (and write-barrier) before writing to the rest of the |
420 | * structure, and clear it last. Just as Xen does, we address the |
421 | * single *byte* in which it resides because it might be in a |
422 | * different cache line to the rest of the 64-bit word, due to |
423 | * the (lack of) alignment constraints. |
424 | */ |
425 | entry_time = vx->runstate_entry_time; |
426 | if (update_bit) { |
427 | entry_time |= XEN_RUNSTATE_UPDATE; |
428 | *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56; |
429 | smp_wmb(); |
430 | } |
431 | |
432 | /* |
433 | * Now assemble the actual structure, either on our kernel stack |
434 | * or directly in the guest according to how the rs_state and |
435 | * rs_times pointers were set up above. |
436 | */ |
437 | *rs_state = vx->current_runstate; |
438 | rs_times[0] = entry_time; |
439 | memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); |
440 | |
441 | /* For the split case, we have to then copy it to the guest. */ |
442 | if (user_len2) { |
443 | memcpy(gpc1->khva, rs_state, user_len1); |
444 | memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2); |
445 | } |
446 | smp_wmb(); |
447 | |
448 | /* Finally, clear the XEN_RUNSTATE_UPDATE bit. */ |
449 | if (update_bit) { |
450 | entry_time &= ~XEN_RUNSTATE_UPDATE; |
451 | *update_bit = entry_time >> 56; |
452 | smp_wmb(); |
453 | } |
454 | |
455 | if (user_len2) |
456 | read_unlock(&gpc2->lock); |
457 | |
458 | read_unlock_irqrestore(&gpc1->lock, flags); |
459 | |
460 | mark_page_dirty_in_slot(kvm: v->kvm, memslot: gpc1->memslot, gfn: gpc1->gpa >> PAGE_SHIFT); |
461 | if (user_len2) |
462 | mark_page_dirty_in_slot(kvm: v->kvm, memslot: gpc2->memslot, gfn: gpc2->gpa >> PAGE_SHIFT); |
463 | } |
464 | |
465 | void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) |
466 | { |
467 | struct kvm_vcpu_xen *vx = &v->arch.xen; |
468 | u64 now = get_kvmclock_ns(kvm: v->kvm); |
469 | u64 delta_ns = now - vx->runstate_entry_time; |
470 | u64 run_delay = current->sched_info.run_delay; |
471 | |
472 | if (unlikely(!vx->runstate_entry_time)) |
473 | vx->current_runstate = RUNSTATE_offline; |
474 | |
475 | /* |
476 | * Time waiting for the scheduler isn't "stolen" if the |
477 | * vCPU wasn't running anyway. |
478 | */ |
479 | if (vx->current_runstate == RUNSTATE_running) { |
480 | u64 steal_ns = run_delay - vx->last_steal; |
481 | |
482 | delta_ns -= steal_ns; |
483 | |
484 | vx->runstate_times[RUNSTATE_runnable] += steal_ns; |
485 | } |
486 | vx->last_steal = run_delay; |
487 | |
488 | vx->runstate_times[vx->current_runstate] += delta_ns; |
489 | vx->current_runstate = state; |
490 | vx->runstate_entry_time = now; |
491 | |
492 | if (vx->runstate_cache.active) |
493 | kvm_xen_update_runstate_guest(v, atomic: state == RUNSTATE_runnable); |
494 | } |
495 | |
496 | static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) |
497 | { |
498 | struct kvm_lapic_irq irq = { }; |
499 | int r; |
500 | |
501 | irq.dest_id = v->vcpu_id; |
502 | irq.vector = v->arch.xen.upcall_vector; |
503 | irq.dest_mode = APIC_DEST_PHYSICAL; |
504 | irq.shorthand = APIC_DEST_NOSHORT; |
505 | irq.delivery_mode = APIC_DM_FIXED; |
506 | irq.level = 1; |
507 | |
508 | /* The fast version will always work for physical unicast */ |
509 | WARN_ON_ONCE(!kvm_irq_delivery_to_apic_fast(v->kvm, NULL, &irq, &r, NULL)); |
510 | } |
511 | |
512 | /* |
513 | * On event channel delivery, the vcpu_info may not have been accessible. |
514 | * In that case, there are bits in vcpu->arch.xen.evtchn_pending_sel which |
515 | * need to be marked into the vcpu_info (and evtchn_upcall_pending set). |
516 | * Do so now that we can sleep in the context of the vCPU to bring the |
517 | * page in, and refresh the pfn cache for it. |
518 | */ |
519 | void kvm_xen_inject_pending_events(struct kvm_vcpu *v) |
520 | { |
521 | unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel); |
522 | struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache; |
523 | unsigned long flags; |
524 | |
525 | if (!evtchn_pending_sel) |
526 | return; |
527 | |
528 | /* |
529 | * Yes, this is an open-coded loop. But that's just what put_user() |
530 | * does anyway. Page it in and retry the instruction. We're just a |
531 | * little more honest about it. |
532 | */ |
533 | read_lock_irqsave(&gpc->lock, flags); |
534 | while (!kvm_gpc_check(gpc, len: sizeof(struct vcpu_info))) { |
535 | read_unlock_irqrestore(&gpc->lock, flags); |
536 | |
537 | if (kvm_gpc_refresh(gpc, len: sizeof(struct vcpu_info))) |
538 | return; |
539 | |
540 | read_lock_irqsave(&gpc->lock, flags); |
541 | } |
542 | |
543 | /* Now gpc->khva is a valid kernel address for the vcpu_info */ |
544 | if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { |
545 | struct vcpu_info *vi = gpc->khva; |
546 | |
547 | asm volatile(LOCK_PREFIX "orq %0, %1\n" |
548 | "notq %0\n" |
549 | LOCK_PREFIX "andq %0, %2\n" |
550 | : "=r" (evtchn_pending_sel), |
551 | "+m" (vi->evtchn_pending_sel), |
552 | "+m" (v->arch.xen.evtchn_pending_sel) |
553 | : "0" (evtchn_pending_sel)); |
554 | WRITE_ONCE(vi->evtchn_upcall_pending, 1); |
555 | } else { |
556 | u32 evtchn_pending_sel32 = evtchn_pending_sel; |
557 | struct compat_vcpu_info *vi = gpc->khva; |
558 | |
559 | asm volatile(LOCK_PREFIX "orl %0, %1\n" |
560 | "notl %0\n" |
561 | LOCK_PREFIX "andl %0, %2\n" |
562 | : "=r" (evtchn_pending_sel32), |
563 | "+m" (vi->evtchn_pending_sel), |
564 | "+m" (v->arch.xen.evtchn_pending_sel) |
565 | : "0" (evtchn_pending_sel32)); |
566 | WRITE_ONCE(vi->evtchn_upcall_pending, 1); |
567 | } |
568 | read_unlock_irqrestore(&gpc->lock, flags); |
569 | |
570 | /* For the per-vCPU lapic vector, deliver it as MSI. */ |
571 | if (v->arch.xen.upcall_vector) |
572 | kvm_xen_inject_vcpu_vector(v); |
573 | |
574 | mark_page_dirty_in_slot(kvm: v->kvm, memslot: gpc->memslot, gfn: gpc->gpa >> PAGE_SHIFT); |
575 | } |
576 | |
577 | int __kvm_xen_has_interrupt(struct kvm_vcpu *v) |
578 | { |
579 | struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache; |
580 | unsigned long flags; |
581 | u8 rc = 0; |
582 | |
583 | /* |
584 | * If the global upcall vector (HVMIRQ_callback_vector) is set and |
585 | * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. |
586 | */ |
587 | |
588 | /* No need for compat handling here */ |
589 | BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != |
590 | offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); |
591 | BUILD_BUG_ON(sizeof(rc) != |
592 | sizeof_field(struct vcpu_info, evtchn_upcall_pending)); |
593 | BUILD_BUG_ON(sizeof(rc) != |
594 | sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); |
595 | |
596 | read_lock_irqsave(&gpc->lock, flags); |
597 | while (!kvm_gpc_check(gpc, len: sizeof(struct vcpu_info))) { |
598 | read_unlock_irqrestore(&gpc->lock, flags); |
599 | |
600 | /* |
601 | * This function gets called from kvm_vcpu_block() after setting the |
602 | * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately |
603 | * from a HLT. So we really mustn't sleep. If the page ended up absent |
604 | * at that point, just return 1 in order to trigger an immediate wake, |
605 | * and we'll end up getting called again from a context where we *can* |
606 | * fault in the page and wait for it. |
607 | */ |
608 | if (in_atomic() || !task_is_running(current)) |
609 | return 1; |
610 | |
611 | if (kvm_gpc_refresh(gpc, len: sizeof(struct vcpu_info))) { |
612 | /* |
613 | * If this failed, userspace has screwed up the |
614 | * vcpu_info mapping. No interrupts for you. |
615 | */ |
616 | return 0; |
617 | } |
618 | read_lock_irqsave(&gpc->lock, flags); |
619 | } |
620 | |
621 | rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending; |
622 | read_unlock_irqrestore(&gpc->lock, flags); |
623 | return rc; |
624 | } |
625 | |
626 | int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) |
627 | { |
628 | int r = -ENOENT; |
629 | |
630 | |
631 | switch (data->type) { |
632 | case KVM_XEN_ATTR_TYPE_LONG_MODE: |
633 | if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { |
634 | r = -EINVAL; |
635 | } else { |
636 | mutex_lock(&kvm->arch.xen.xen_lock); |
637 | kvm->arch.xen.long_mode = !!data->u.long_mode; |
638 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
639 | r = 0; |
640 | } |
641 | break; |
642 | |
643 | case KVM_XEN_ATTR_TYPE_SHARED_INFO: |
644 | mutex_lock(&kvm->arch.xen.xen_lock); |
645 | r = kvm_xen_shared_info_init(kvm, gfn: data->u.shared_info.gfn); |
646 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
647 | break; |
648 | |
649 | case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: |
650 | if (data->u.vector && data->u.vector < 0x10) |
651 | r = -EINVAL; |
652 | else { |
653 | mutex_lock(&kvm->arch.xen.xen_lock); |
654 | kvm->arch.xen.upcall_vector = data->u.vector; |
655 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
656 | r = 0; |
657 | } |
658 | break; |
659 | |
660 | case KVM_XEN_ATTR_TYPE_EVTCHN: |
661 | r = kvm_xen_setattr_evtchn(kvm, data); |
662 | break; |
663 | |
664 | case KVM_XEN_ATTR_TYPE_XEN_VERSION: |
665 | mutex_lock(&kvm->arch.xen.xen_lock); |
666 | kvm->arch.xen.xen_version = data->u.xen_version; |
667 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
668 | r = 0; |
669 | break; |
670 | |
671 | case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: |
672 | if (!sched_info_on()) { |
673 | r = -EOPNOTSUPP; |
674 | break; |
675 | } |
676 | mutex_lock(&kvm->arch.xen.xen_lock); |
677 | kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; |
678 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
679 | r = 0; |
680 | break; |
681 | |
682 | default: |
683 | break; |
684 | } |
685 | |
686 | return r; |
687 | } |
688 | |
689 | int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) |
690 | { |
691 | int r = -ENOENT; |
692 | |
693 | mutex_lock(&kvm->arch.xen.xen_lock); |
694 | |
695 | switch (data->type) { |
696 | case KVM_XEN_ATTR_TYPE_LONG_MODE: |
697 | data->u.long_mode = kvm->arch.xen.long_mode; |
698 | r = 0; |
699 | break; |
700 | |
701 | case KVM_XEN_ATTR_TYPE_SHARED_INFO: |
702 | if (kvm->arch.xen.shinfo_cache.active) |
703 | data->u.shared_info.gfn = gpa_to_gfn(gpa: kvm->arch.xen.shinfo_cache.gpa); |
704 | else |
705 | data->u.shared_info.gfn = KVM_XEN_INVALID_GFN; |
706 | r = 0; |
707 | break; |
708 | |
709 | case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: |
710 | data->u.vector = kvm->arch.xen.upcall_vector; |
711 | r = 0; |
712 | break; |
713 | |
714 | case KVM_XEN_ATTR_TYPE_XEN_VERSION: |
715 | data->u.xen_version = kvm->arch.xen.xen_version; |
716 | r = 0; |
717 | break; |
718 | |
719 | case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: |
720 | if (!sched_info_on()) { |
721 | r = -EOPNOTSUPP; |
722 | break; |
723 | } |
724 | data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag; |
725 | r = 0; |
726 | break; |
727 | |
728 | default: |
729 | break; |
730 | } |
731 | |
732 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
733 | return r; |
734 | } |
735 | |
736 | int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) |
737 | { |
738 | int idx, r = -ENOENT; |
739 | |
740 | mutex_lock(&vcpu->kvm->arch.xen.xen_lock); |
741 | idx = srcu_read_lock(ssp: &vcpu->kvm->srcu); |
742 | |
743 | switch (data->type) { |
744 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: |
745 | /* No compat necessary here. */ |
746 | BUILD_BUG_ON(sizeof(struct vcpu_info) != |
747 | sizeof(struct compat_vcpu_info)); |
748 | BUILD_BUG_ON(offsetof(struct vcpu_info, time) != |
749 | offsetof(struct compat_vcpu_info, time)); |
750 | |
751 | if (data->u.gpa == KVM_XEN_INVALID_GPA) { |
752 | kvm_gpc_deactivate(gpc: &vcpu->arch.xen.vcpu_info_cache); |
753 | r = 0; |
754 | break; |
755 | } |
756 | |
757 | r = kvm_gpc_activate(gpc: &vcpu->arch.xen.vcpu_info_cache, |
758 | gpa: data->u.gpa, len: sizeof(struct vcpu_info)); |
759 | if (!r) |
760 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
761 | |
762 | break; |
763 | |
764 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: |
765 | if (data->u.gpa == KVM_XEN_INVALID_GPA) { |
766 | kvm_gpc_deactivate(gpc: &vcpu->arch.xen.vcpu_time_info_cache); |
767 | r = 0; |
768 | break; |
769 | } |
770 | |
771 | r = kvm_gpc_activate(gpc: &vcpu->arch.xen.vcpu_time_info_cache, |
772 | gpa: data->u.gpa, |
773 | len: sizeof(struct pvclock_vcpu_time_info)); |
774 | if (!r) |
775 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
776 | break; |
777 | |
778 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: { |
779 | size_t sz, sz1, sz2; |
780 | |
781 | if (!sched_info_on()) { |
782 | r = -EOPNOTSUPP; |
783 | break; |
784 | } |
785 | if (data->u.gpa == KVM_XEN_INVALID_GPA) { |
786 | r = 0; |
787 | deactivate_out: |
788 | kvm_gpc_deactivate(gpc: &vcpu->arch.xen.runstate_cache); |
789 | kvm_gpc_deactivate(gpc: &vcpu->arch.xen.runstate2_cache); |
790 | break; |
791 | } |
792 | |
793 | /* |
794 | * If the guest switches to 64-bit mode after setting the runstate |
795 | * address, that's actually OK. kvm_xen_update_runstate_guest() |
796 | * will cope. |
797 | */ |
798 | if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) |
799 | sz = sizeof(struct vcpu_runstate_info); |
800 | else |
801 | sz = sizeof(struct compat_vcpu_runstate_info); |
802 | |
803 | /* How much fits in the (first) page? */ |
804 | sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); |
805 | r = kvm_gpc_activate(gpc: &vcpu->arch.xen.runstate_cache, |
806 | gpa: data->u.gpa, len: sz1); |
807 | if (r) |
808 | goto deactivate_out; |
809 | |
810 | /* Either map the second page, or deactivate the second GPC */ |
811 | if (sz1 >= sz) { |
812 | kvm_gpc_deactivate(gpc: &vcpu->arch.xen.runstate2_cache); |
813 | } else { |
814 | sz2 = sz - sz1; |
815 | BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK); |
816 | r = kvm_gpc_activate(gpc: &vcpu->arch.xen.runstate2_cache, |
817 | gpa: data->u.gpa + sz1, len: sz2); |
818 | if (r) |
819 | goto deactivate_out; |
820 | } |
821 | |
822 | kvm_xen_update_runstate_guest(v: vcpu, atomic: false); |
823 | break; |
824 | } |
825 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: |
826 | if (!sched_info_on()) { |
827 | r = -EOPNOTSUPP; |
828 | break; |
829 | } |
830 | if (data->u.runstate.state > RUNSTATE_offline) { |
831 | r = -EINVAL; |
832 | break; |
833 | } |
834 | |
835 | kvm_xen_update_runstate(v: vcpu, state: data->u.runstate.state); |
836 | r = 0; |
837 | break; |
838 | |
839 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: |
840 | if (!sched_info_on()) { |
841 | r = -EOPNOTSUPP; |
842 | break; |
843 | } |
844 | if (data->u.runstate.state > RUNSTATE_offline) { |
845 | r = -EINVAL; |
846 | break; |
847 | } |
848 | if (data->u.runstate.state_entry_time != |
849 | (data->u.runstate.time_running + |
850 | data->u.runstate.time_runnable + |
851 | data->u.runstate.time_blocked + |
852 | data->u.runstate.time_offline)) { |
853 | r = -EINVAL; |
854 | break; |
855 | } |
856 | if (get_kvmclock_ns(kvm: vcpu->kvm) < |
857 | data->u.runstate.state_entry_time) { |
858 | r = -EINVAL; |
859 | break; |
860 | } |
861 | |
862 | vcpu->arch.xen.current_runstate = data->u.runstate.state; |
863 | vcpu->arch.xen.runstate_entry_time = |
864 | data->u.runstate.state_entry_time; |
865 | vcpu->arch.xen.runstate_times[RUNSTATE_running] = |
866 | data->u.runstate.time_running; |
867 | vcpu->arch.xen.runstate_times[RUNSTATE_runnable] = |
868 | data->u.runstate.time_runnable; |
869 | vcpu->arch.xen.runstate_times[RUNSTATE_blocked] = |
870 | data->u.runstate.time_blocked; |
871 | vcpu->arch.xen.runstate_times[RUNSTATE_offline] = |
872 | data->u.runstate.time_offline; |
873 | vcpu->arch.xen.last_steal = current->sched_info.run_delay; |
874 | r = 0; |
875 | break; |
876 | |
877 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: |
878 | if (!sched_info_on()) { |
879 | r = -EOPNOTSUPP; |
880 | break; |
881 | } |
882 | if (data->u.runstate.state > RUNSTATE_offline && |
883 | data->u.runstate.state != (u64)-1) { |
884 | r = -EINVAL; |
885 | break; |
886 | } |
887 | /* The adjustment must add up */ |
888 | if (data->u.runstate.state_entry_time != |
889 | (data->u.runstate.time_running + |
890 | data->u.runstate.time_runnable + |
891 | data->u.runstate.time_blocked + |
892 | data->u.runstate.time_offline)) { |
893 | r = -EINVAL; |
894 | break; |
895 | } |
896 | |
897 | if (get_kvmclock_ns(kvm: vcpu->kvm) < |
898 | (vcpu->arch.xen.runstate_entry_time + |
899 | data->u.runstate.state_entry_time)) { |
900 | r = -EINVAL; |
901 | break; |
902 | } |
903 | |
904 | vcpu->arch.xen.runstate_entry_time += |
905 | data->u.runstate.state_entry_time; |
906 | vcpu->arch.xen.runstate_times[RUNSTATE_running] += |
907 | data->u.runstate.time_running; |
908 | vcpu->arch.xen.runstate_times[RUNSTATE_runnable] += |
909 | data->u.runstate.time_runnable; |
910 | vcpu->arch.xen.runstate_times[RUNSTATE_blocked] += |
911 | data->u.runstate.time_blocked; |
912 | vcpu->arch.xen.runstate_times[RUNSTATE_offline] += |
913 | data->u.runstate.time_offline; |
914 | |
915 | if (data->u.runstate.state <= RUNSTATE_offline) |
916 | kvm_xen_update_runstate(v: vcpu, state: data->u.runstate.state); |
917 | else if (vcpu->arch.xen.runstate_cache.active) |
918 | kvm_xen_update_runstate_guest(v: vcpu, atomic: false); |
919 | r = 0; |
920 | break; |
921 | |
922 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID: |
923 | if (data->u.vcpu_id >= KVM_MAX_VCPUS) |
924 | r = -EINVAL; |
925 | else { |
926 | vcpu->arch.xen.vcpu_id = data->u.vcpu_id; |
927 | r = 0; |
928 | } |
929 | break; |
930 | |
931 | case KVM_XEN_VCPU_ATTR_TYPE_TIMER: |
932 | if (data->u.timer.port && |
933 | data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) { |
934 | r = -EINVAL; |
935 | break; |
936 | } |
937 | |
938 | if (!vcpu->arch.xen.timer.function) |
939 | kvm_xen_init_timer(vcpu); |
940 | |
941 | /* Stop the timer (if it's running) before changing the vector */ |
942 | kvm_xen_stop_timer(vcpu); |
943 | vcpu->arch.xen.timer_virq = data->u.timer.port; |
944 | |
945 | /* Start the timer if the new value has a valid vector+expiry. */ |
946 | if (data->u.timer.port && data->u.timer.expires_ns) |
947 | kvm_xen_start_timer(vcpu, guest_abs: data->u.timer.expires_ns, |
948 | delta_ns: data->u.timer.expires_ns - |
949 | get_kvmclock_ns(kvm: vcpu->kvm)); |
950 | |
951 | r = 0; |
952 | break; |
953 | |
954 | case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR: |
955 | if (data->u.vector && data->u.vector < 0x10) |
956 | r = -EINVAL; |
957 | else { |
958 | vcpu->arch.xen.upcall_vector = data->u.vector; |
959 | r = 0; |
960 | } |
961 | break; |
962 | |
963 | default: |
964 | break; |
965 | } |
966 | |
967 | srcu_read_unlock(ssp: &vcpu->kvm->srcu, idx); |
968 | mutex_unlock(lock: &vcpu->kvm->arch.xen.xen_lock); |
969 | return r; |
970 | } |
971 | |
972 | int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) |
973 | { |
974 | int r = -ENOENT; |
975 | |
976 | mutex_lock(&vcpu->kvm->arch.xen.xen_lock); |
977 | |
978 | switch (data->type) { |
979 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: |
980 | if (vcpu->arch.xen.vcpu_info_cache.active) |
981 | data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; |
982 | else |
983 | data->u.gpa = KVM_XEN_INVALID_GPA; |
984 | r = 0; |
985 | break; |
986 | |
987 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: |
988 | if (vcpu->arch.xen.vcpu_time_info_cache.active) |
989 | data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa; |
990 | else |
991 | data->u.gpa = KVM_XEN_INVALID_GPA; |
992 | r = 0; |
993 | break; |
994 | |
995 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: |
996 | if (!sched_info_on()) { |
997 | r = -EOPNOTSUPP; |
998 | break; |
999 | } |
1000 | if (vcpu->arch.xen.runstate_cache.active) { |
1001 | data->u.gpa = vcpu->arch.xen.runstate_cache.gpa; |
1002 | r = 0; |
1003 | } |
1004 | break; |
1005 | |
1006 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: |
1007 | if (!sched_info_on()) { |
1008 | r = -EOPNOTSUPP; |
1009 | break; |
1010 | } |
1011 | data->u.runstate.state = vcpu->arch.xen.current_runstate; |
1012 | r = 0; |
1013 | break; |
1014 | |
1015 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: |
1016 | if (!sched_info_on()) { |
1017 | r = -EOPNOTSUPP; |
1018 | break; |
1019 | } |
1020 | data->u.runstate.state = vcpu->arch.xen.current_runstate; |
1021 | data->u.runstate.state_entry_time = |
1022 | vcpu->arch.xen.runstate_entry_time; |
1023 | data->u.runstate.time_running = |
1024 | vcpu->arch.xen.runstate_times[RUNSTATE_running]; |
1025 | data->u.runstate.time_runnable = |
1026 | vcpu->arch.xen.runstate_times[RUNSTATE_runnable]; |
1027 | data->u.runstate.time_blocked = |
1028 | vcpu->arch.xen.runstate_times[RUNSTATE_blocked]; |
1029 | data->u.runstate.time_offline = |
1030 | vcpu->arch.xen.runstate_times[RUNSTATE_offline]; |
1031 | r = 0; |
1032 | break; |
1033 | |
1034 | case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: |
1035 | r = -EINVAL; |
1036 | break; |
1037 | |
1038 | case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID: |
1039 | data->u.vcpu_id = vcpu->arch.xen.vcpu_id; |
1040 | r = 0; |
1041 | break; |
1042 | |
1043 | case KVM_XEN_VCPU_ATTR_TYPE_TIMER: |
1044 | /* |
1045 | * Ensure a consistent snapshot of state is captured, with a |
1046 | * timer either being pending, or the event channel delivered |
1047 | * to the corresponding bit in the shared_info. Not still |
1048 | * lurking in the timer_pending flag for deferred delivery. |
1049 | * Purely as an optimisation, if the timer_expires field is |
1050 | * zero, that means the timer isn't active (or even in the |
1051 | * timer_pending flag) and there is no need to cancel it. |
1052 | */ |
1053 | if (vcpu->arch.xen.timer_expires) { |
1054 | hrtimer_cancel(timer: &vcpu->arch.xen.timer); |
1055 | kvm_xen_inject_timer_irqs(vcpu); |
1056 | } |
1057 | |
1058 | data->u.timer.port = vcpu->arch.xen.timer_virq; |
1059 | data->u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; |
1060 | data->u.timer.expires_ns = vcpu->arch.xen.timer_expires; |
1061 | |
1062 | /* |
1063 | * The hrtimer may trigger and raise the IRQ immediately, |
1064 | * while the returned state causes it to be set up and |
1065 | * raised again on the destination system after migration. |
1066 | * That's fine, as the guest won't even have had a chance |
1067 | * to run and handle the interrupt. Asserting an already |
1068 | * pending event channel is idempotent. |
1069 | */ |
1070 | if (vcpu->arch.xen.timer_expires) |
1071 | hrtimer_start_expires(timer: &vcpu->arch.xen.timer, |
1072 | mode: HRTIMER_MODE_ABS_HARD); |
1073 | |
1074 | r = 0; |
1075 | break; |
1076 | |
1077 | case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR: |
1078 | data->u.vector = vcpu->arch.xen.upcall_vector; |
1079 | r = 0; |
1080 | break; |
1081 | |
1082 | default: |
1083 | break; |
1084 | } |
1085 | |
1086 | mutex_unlock(lock: &vcpu->kvm->arch.xen.xen_lock); |
1087 | return r; |
1088 | } |
1089 | |
1090 | int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) |
1091 | { |
1092 | struct kvm *kvm = vcpu->kvm; |
1093 | u32 page_num = data & ~PAGE_MASK; |
1094 | u64 page_addr = data & PAGE_MASK; |
1095 | bool lm = is_long_mode(vcpu); |
1096 | |
1097 | /* Latch long_mode for shared_info pages etc. */ |
1098 | vcpu->kvm->arch.xen.long_mode = lm; |
1099 | |
1100 | /* |
1101 | * If Xen hypercall intercept is enabled, fill the hypercall |
1102 | * page with VMCALL/VMMCALL instructions since that's what |
1103 | * we catch. Else the VMM has provided the hypercall pages |
1104 | * with instructions of its own choosing, so use those. |
1105 | */ |
1106 | if (kvm_xen_hypercall_enabled(kvm)) { |
1107 | u8 instructions[32]; |
1108 | int i; |
1109 | |
1110 | if (page_num) |
1111 | return 1; |
1112 | |
1113 | /* mov imm32, %eax */ |
1114 | instructions[0] = 0xb8; |
1115 | |
1116 | /* vmcall / vmmcall */ |
1117 | static_call(kvm_x86_patch_hypercall)(vcpu, instructions + 5); |
1118 | |
1119 | /* ret */ |
1120 | instructions[8] = 0xc3; |
1121 | |
1122 | /* int3 to pad */ |
1123 | memset(instructions + 9, 0xcc, sizeof(instructions) - 9); |
1124 | |
1125 | for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) { |
1126 | *(u32 *)&instructions[1] = i; |
1127 | if (kvm_vcpu_write_guest(vcpu, |
1128 | gpa: page_addr + (i * sizeof(instructions)), |
1129 | data: instructions, len: sizeof(instructions))) |
1130 | return 1; |
1131 | } |
1132 | } else { |
1133 | /* |
1134 | * Note, truncation is a non-issue as 'lm' is guaranteed to be |
1135 | * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes. |
1136 | */ |
1137 | hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64 |
1138 | : kvm->arch.xen_hvm_config.blob_addr_32; |
1139 | u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 |
1140 | : kvm->arch.xen_hvm_config.blob_size_32; |
1141 | u8 *page; |
1142 | int ret; |
1143 | |
1144 | if (page_num >= blob_size) |
1145 | return 1; |
1146 | |
1147 | blob_addr += page_num * PAGE_SIZE; |
1148 | |
1149 | page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE); |
1150 | if (IS_ERR(ptr: page)) |
1151 | return PTR_ERR(ptr: page); |
1152 | |
1153 | ret = kvm_vcpu_write_guest(vcpu, gpa: page_addr, data: page, PAGE_SIZE); |
1154 | kfree(objp: page); |
1155 | if (ret) |
1156 | return 1; |
1157 | } |
1158 | return 0; |
1159 | } |
1160 | |
1161 | int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) |
1162 | { |
1163 | /* Only some feature flags need to be *enabled* by userspace */ |
1164 | u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | |
1165 | KVM_XEN_HVM_CONFIG_EVTCHN_SEND; |
1166 | |
1167 | if (xhc->flags & ~permitted_flags) |
1168 | return -EINVAL; |
1169 | |
1170 | /* |
1171 | * With hypercall interception the kernel generates its own |
1172 | * hypercall page so it must not be provided. |
1173 | */ |
1174 | if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) && |
1175 | (xhc->blob_addr_32 || xhc->blob_addr_64 || |
1176 | xhc->blob_size_32 || xhc->blob_size_64)) |
1177 | return -EINVAL; |
1178 | |
1179 | mutex_lock(&kvm->arch.xen.xen_lock); |
1180 | |
1181 | if (xhc->msr && !kvm->arch.xen_hvm_config.msr) |
1182 | static_branch_inc(&kvm_xen_enabled.key); |
1183 | else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) |
1184 | static_branch_slow_dec_deferred(&kvm_xen_enabled); |
1185 | |
1186 | memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); |
1187 | |
1188 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
1189 | return 0; |
1190 | } |
1191 | |
1192 | static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) |
1193 | { |
1194 | kvm_rax_write(vcpu, val: result); |
1195 | return kvm_skip_emulated_instruction(vcpu); |
1196 | } |
1197 | |
1198 | static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) |
1199 | { |
1200 | struct kvm_run *run = vcpu->run; |
1201 | |
1202 | if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip))) |
1203 | return 1; |
1204 | |
1205 | return kvm_xen_hypercall_set_result(vcpu, result: run->xen.u.hcall.result); |
1206 | } |
1207 | |
1208 | static inline int max_evtchn_port(struct kvm *kvm) |
1209 | { |
1210 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) |
1211 | return EVTCHN_2L_NR_CHANNELS; |
1212 | else |
1213 | return COMPAT_EVTCHN_2L_NR_CHANNELS; |
1214 | } |
1215 | |
1216 | static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, |
1217 | evtchn_port_t *ports) |
1218 | { |
1219 | struct kvm *kvm = vcpu->kvm; |
1220 | struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; |
1221 | unsigned long *pending_bits; |
1222 | unsigned long flags; |
1223 | bool ret = true; |
1224 | int idx, i; |
1225 | |
1226 | idx = srcu_read_lock(ssp: &kvm->srcu); |
1227 | read_lock_irqsave(&gpc->lock, flags); |
1228 | if (!kvm_gpc_check(gpc, PAGE_SIZE)) |
1229 | goto out_rcu; |
1230 | |
1231 | ret = false; |
1232 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { |
1233 | struct shared_info *shinfo = gpc->khva; |
1234 | pending_bits = (unsigned long *)&shinfo->evtchn_pending; |
1235 | } else { |
1236 | struct compat_shared_info *shinfo = gpc->khva; |
1237 | pending_bits = (unsigned long *)&shinfo->evtchn_pending; |
1238 | } |
1239 | |
1240 | for (i = 0; i < nr_ports; i++) { |
1241 | if (test_bit(ports[i], pending_bits)) { |
1242 | ret = true; |
1243 | break; |
1244 | } |
1245 | } |
1246 | |
1247 | out_rcu: |
1248 | read_unlock_irqrestore(&gpc->lock, flags); |
1249 | srcu_read_unlock(ssp: &kvm->srcu, idx); |
1250 | |
1251 | return ret; |
1252 | } |
1253 | |
1254 | static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, |
1255 | u64 param, u64 *r) |
1256 | { |
1257 | struct sched_poll sched_poll; |
1258 | evtchn_port_t port, *ports; |
1259 | struct x86_exception e; |
1260 | int i; |
1261 | |
1262 | if (!lapic_in_kernel(vcpu) || |
1263 | !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) |
1264 | return false; |
1265 | |
1266 | if (IS_ENABLED(CONFIG_64BIT) && !longmode) { |
1267 | struct compat_sched_poll sp32; |
1268 | |
1269 | /* Sanity check that the compat struct definition is correct */ |
1270 | BUILD_BUG_ON(sizeof(sp32) != 16); |
1271 | |
1272 | if (kvm_read_guest_virt(vcpu, addr: param, val: &sp32, bytes: sizeof(sp32), exception: &e)) { |
1273 | *r = -EFAULT; |
1274 | return true; |
1275 | } |
1276 | |
1277 | /* |
1278 | * This is a 32-bit pointer to an array of evtchn_port_t which |
1279 | * are uint32_t, so once it's converted no further compat |
1280 | * handling is needed. |
1281 | */ |
1282 | sched_poll.ports = (void *)(unsigned long)(sp32.ports); |
1283 | sched_poll.nr_ports = sp32.nr_ports; |
1284 | sched_poll.timeout = sp32.timeout; |
1285 | } else { |
1286 | if (kvm_read_guest_virt(vcpu, addr: param, val: &sched_poll, |
1287 | bytes: sizeof(sched_poll), exception: &e)) { |
1288 | *r = -EFAULT; |
1289 | return true; |
1290 | } |
1291 | } |
1292 | |
1293 | if (unlikely(sched_poll.nr_ports > 1)) { |
1294 | /* Xen (unofficially) limits number of pollers to 128 */ |
1295 | if (sched_poll.nr_ports > 128) { |
1296 | *r = -EINVAL; |
1297 | return true; |
1298 | } |
1299 | |
1300 | ports = kmalloc_array(n: sched_poll.nr_ports, |
1301 | size: sizeof(*ports), GFP_KERNEL); |
1302 | if (!ports) { |
1303 | *r = -ENOMEM; |
1304 | return true; |
1305 | } |
1306 | } else |
1307 | ports = &port; |
1308 | |
1309 | if (kvm_read_guest_virt(vcpu, addr: (gva_t)sched_poll.ports, val: ports, |
1310 | bytes: sched_poll.nr_ports * sizeof(*ports), exception: &e)) { |
1311 | *r = -EFAULT; |
1312 | return true; |
1313 | } |
1314 | |
1315 | for (i = 0; i < sched_poll.nr_ports; i++) { |
1316 | if (ports[i] >= max_evtchn_port(kvm: vcpu->kvm)) { |
1317 | *r = -EINVAL; |
1318 | goto out; |
1319 | } |
1320 | } |
1321 | |
1322 | if (sched_poll.nr_ports == 1) |
1323 | vcpu->arch.xen.poll_evtchn = port; |
1324 | else |
1325 | vcpu->arch.xen.poll_evtchn = -1; |
1326 | |
1327 | set_bit(nr: vcpu->vcpu_idx, addr: vcpu->kvm->arch.xen.poll_mask); |
1328 | |
1329 | if (!wait_pending_event(vcpu, nr_ports: sched_poll.nr_ports, ports)) { |
1330 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; |
1331 | |
1332 | if (sched_poll.timeout) |
1333 | mod_timer(timer: &vcpu->arch.xen.poll_timer, |
1334 | expires: jiffies + nsecs_to_jiffies(n: sched_poll.timeout)); |
1335 | |
1336 | kvm_vcpu_halt(vcpu); |
1337 | |
1338 | if (sched_poll.timeout) |
1339 | del_timer(timer: &vcpu->arch.xen.poll_timer); |
1340 | |
1341 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
1342 | } |
1343 | |
1344 | vcpu->arch.xen.poll_evtchn = 0; |
1345 | *r = 0; |
1346 | out: |
1347 | /* Really, this is only needed in case of timeout */ |
1348 | clear_bit(nr: vcpu->vcpu_idx, addr: vcpu->kvm->arch.xen.poll_mask); |
1349 | |
1350 | if (unlikely(sched_poll.nr_ports > 1)) |
1351 | kfree(objp: ports); |
1352 | return true; |
1353 | } |
1354 | |
1355 | static void cancel_evtchn_poll(struct timer_list *t) |
1356 | { |
1357 | struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer); |
1358 | |
1359 | kvm_make_request(KVM_REQ_UNBLOCK, vcpu); |
1360 | kvm_vcpu_kick(vcpu); |
1361 | } |
1362 | |
1363 | static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode, |
1364 | int cmd, u64 param, u64 *r) |
1365 | { |
1366 | switch (cmd) { |
1367 | case SCHEDOP_poll: |
1368 | if (kvm_xen_schedop_poll(vcpu, longmode, param, r)) |
1369 | return true; |
1370 | fallthrough; |
1371 | case SCHEDOP_yield: |
1372 | kvm_vcpu_on_spin(vcpu, yield_to_kernel_mode: true); |
1373 | *r = 0; |
1374 | return true; |
1375 | default: |
1376 | break; |
1377 | } |
1378 | |
1379 | return false; |
1380 | } |
1381 | |
1382 | struct compat_vcpu_set_singleshot_timer { |
1383 | uint64_t timeout_abs_ns; |
1384 | uint32_t flags; |
1385 | } __attribute__((packed)); |
1386 | |
1387 | static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, |
1388 | int vcpu_id, u64 param, u64 *r) |
1389 | { |
1390 | struct vcpu_set_singleshot_timer oneshot; |
1391 | struct x86_exception e; |
1392 | s64 delta; |
1393 | |
1394 | if (!kvm_xen_timer_enabled(vcpu)) |
1395 | return false; |
1396 | |
1397 | switch (cmd) { |
1398 | case VCPUOP_set_singleshot_timer: |
1399 | if (vcpu->arch.xen.vcpu_id != vcpu_id) { |
1400 | *r = -EINVAL; |
1401 | return true; |
1402 | } |
1403 | |
1404 | /* |
1405 | * The only difference for 32-bit compat is the 4 bytes of |
1406 | * padding after the interesting part of the structure. So |
1407 | * for a faithful emulation of Xen we have to *try* to copy |
1408 | * the padding and return -EFAULT if we can't. Otherwise we |
1409 | * might as well just have copied the 12-byte 32-bit struct. |
1410 | */ |
1411 | BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) != |
1412 | offsetof(struct vcpu_set_singleshot_timer, timeout_abs_ns)); |
1413 | BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) != |
1414 | sizeof_field(struct vcpu_set_singleshot_timer, timeout_abs_ns)); |
1415 | BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, flags) != |
1416 | offsetof(struct vcpu_set_singleshot_timer, flags)); |
1417 | BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, flags) != |
1418 | sizeof_field(struct vcpu_set_singleshot_timer, flags)); |
1419 | |
1420 | if (kvm_read_guest_virt(vcpu, addr: param, val: &oneshot, bytes: longmode ? sizeof(oneshot) : |
1421 | sizeof(struct compat_vcpu_set_singleshot_timer), exception: &e)) { |
1422 | *r = -EFAULT; |
1423 | return true; |
1424 | } |
1425 | |
1426 | /* A delta <= 0 results in an immediate callback, which is what we want */ |
1427 | delta = oneshot.timeout_abs_ns - get_kvmclock_ns(kvm: vcpu->kvm); |
1428 | kvm_xen_start_timer(vcpu, guest_abs: oneshot.timeout_abs_ns, delta_ns: delta); |
1429 | *r = 0; |
1430 | return true; |
1431 | |
1432 | case VCPUOP_stop_singleshot_timer: |
1433 | if (vcpu->arch.xen.vcpu_id != vcpu_id) { |
1434 | *r = -EINVAL; |
1435 | return true; |
1436 | } |
1437 | kvm_xen_stop_timer(vcpu); |
1438 | *r = 0; |
1439 | return true; |
1440 | } |
1441 | |
1442 | return false; |
1443 | } |
1444 | |
1445 | static bool kvm_xen_hcall_set_timer_op(struct kvm_vcpu *vcpu, uint64_t timeout, |
1446 | u64 *r) |
1447 | { |
1448 | if (!kvm_xen_timer_enabled(vcpu)) |
1449 | return false; |
1450 | |
1451 | if (timeout) { |
1452 | uint64_t guest_now = get_kvmclock_ns(kvm: vcpu->kvm); |
1453 | int64_t delta = timeout - guest_now; |
1454 | |
1455 | /* Xen has a 'Linux workaround' in do_set_timer_op() which |
1456 | * checks for negative absolute timeout values (caused by |
1457 | * integer overflow), and for values about 13 days in the |
1458 | * future (2^50ns) which would be caused by jiffies |
1459 | * overflow. For those cases, it sets the timeout 100ms in |
1460 | * the future (not *too* soon, since if a guest really did |
1461 | * set a long timeout on purpose we don't want to keep |
1462 | * churning CPU time by waking it up). |
1463 | */ |
1464 | if (unlikely((int64_t)timeout < 0 || |
1465 | (delta > 0 && (uint32_t) (delta >> 50) != 0))) { |
1466 | delta = 100 * NSEC_PER_MSEC; |
1467 | timeout = guest_now + delta; |
1468 | } |
1469 | |
1470 | kvm_xen_start_timer(vcpu, guest_abs: timeout, delta_ns: delta); |
1471 | } else { |
1472 | kvm_xen_stop_timer(vcpu); |
1473 | } |
1474 | |
1475 | *r = 0; |
1476 | return true; |
1477 | } |
1478 | |
1479 | int kvm_xen_hypercall(struct kvm_vcpu *vcpu) |
1480 | { |
1481 | bool longmode; |
1482 | u64 input, params[6], r = -ENOSYS; |
1483 | bool handled = false; |
1484 | u8 cpl; |
1485 | |
1486 | input = (u64)kvm_register_read(vcpu, reg: VCPU_REGS_RAX); |
1487 | |
1488 | /* Hyper-V hypercalls get bit 31 set in EAX */ |
1489 | if ((input & 0x80000000) && |
1490 | kvm_hv_hypercall_enabled(vcpu)) |
1491 | return kvm_hv_hypercall(vcpu); |
1492 | |
1493 | longmode = is_64_bit_hypercall(vcpu); |
1494 | if (!longmode) { |
1495 | params[0] = (u32)kvm_rbx_read(vcpu); |
1496 | params[1] = (u32)kvm_rcx_read(vcpu); |
1497 | params[2] = (u32)kvm_rdx_read(vcpu); |
1498 | params[3] = (u32)kvm_rsi_read(vcpu); |
1499 | params[4] = (u32)kvm_rdi_read(vcpu); |
1500 | params[5] = (u32)kvm_rbp_read(vcpu); |
1501 | } |
1502 | #ifdef CONFIG_X86_64 |
1503 | else { |
1504 | params[0] = (u64)kvm_rdi_read(vcpu); |
1505 | params[1] = (u64)kvm_rsi_read(vcpu); |
1506 | params[2] = (u64)kvm_rdx_read(vcpu); |
1507 | params[3] = (u64)kvm_r10_read(vcpu); |
1508 | params[4] = (u64)kvm_r8_read(vcpu); |
1509 | params[5] = (u64)kvm_r9_read(vcpu); |
1510 | } |
1511 | #endif |
1512 | cpl = static_call(kvm_x86_get_cpl)(vcpu); |
1513 | trace_kvm_xen_hypercall(cpl, nr: input, a0: params[0], a1: params[1], a2: params[2], |
1514 | a3: params[3], a4: params[4], a5: params[5]); |
1515 | |
1516 | /* |
1517 | * Only allow hypercall acceleration for CPL0. The rare hypercalls that |
1518 | * are permitted in guest userspace can be handled by the VMM. |
1519 | */ |
1520 | if (unlikely(cpl > 0)) |
1521 | goto handle_in_userspace; |
1522 | |
1523 | switch (input) { |
1524 | case __HYPERVISOR_xen_version: |
1525 | if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) { |
1526 | r = vcpu->kvm->arch.xen.xen_version; |
1527 | handled = true; |
1528 | } |
1529 | break; |
1530 | case __HYPERVISOR_event_channel_op: |
1531 | if (params[0] == EVTCHNOP_send) |
1532 | handled = kvm_xen_hcall_evtchn_send(vcpu, param: params[1], r: &r); |
1533 | break; |
1534 | case __HYPERVISOR_sched_op: |
1535 | handled = kvm_xen_hcall_sched_op(vcpu, longmode, cmd: params[0], |
1536 | param: params[1], r: &r); |
1537 | break; |
1538 | case __HYPERVISOR_vcpu_op: |
1539 | handled = kvm_xen_hcall_vcpu_op(vcpu, longmode, cmd: params[0], vcpu_id: params[1], |
1540 | param: params[2], r: &r); |
1541 | break; |
1542 | case __HYPERVISOR_set_timer_op: { |
1543 | u64 timeout = params[0]; |
1544 | /* In 32-bit mode, the 64-bit timeout is in two 32-bit params. */ |
1545 | if (!longmode) |
1546 | timeout |= params[1] << 32; |
1547 | handled = kvm_xen_hcall_set_timer_op(vcpu, timeout, r: &r); |
1548 | break; |
1549 | } |
1550 | default: |
1551 | break; |
1552 | } |
1553 | |
1554 | if (handled) |
1555 | return kvm_xen_hypercall_set_result(vcpu, result: r); |
1556 | |
1557 | handle_in_userspace: |
1558 | vcpu->run->exit_reason = KVM_EXIT_XEN; |
1559 | vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; |
1560 | vcpu->run->xen.u.hcall.longmode = longmode; |
1561 | vcpu->run->xen.u.hcall.cpl = cpl; |
1562 | vcpu->run->xen.u.hcall.input = input; |
1563 | vcpu->run->xen.u.hcall.params[0] = params[0]; |
1564 | vcpu->run->xen.u.hcall.params[1] = params[1]; |
1565 | vcpu->run->xen.u.hcall.params[2] = params[2]; |
1566 | vcpu->run->xen.u.hcall.params[3] = params[3]; |
1567 | vcpu->run->xen.u.hcall.params[4] = params[4]; |
1568 | vcpu->run->xen.u.hcall.params[5] = params[5]; |
1569 | vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu); |
1570 | vcpu->arch.complete_userspace_io = |
1571 | kvm_xen_hypercall_complete_userspace; |
1572 | |
1573 | return 0; |
1574 | } |
1575 | |
1576 | static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) |
1577 | { |
1578 | int poll_evtchn = vcpu->arch.xen.poll_evtchn; |
1579 | |
1580 | if ((poll_evtchn == port || poll_evtchn == -1) && |
1581 | test_and_clear_bit(nr: vcpu->vcpu_idx, addr: vcpu->kvm->arch.xen.poll_mask)) { |
1582 | kvm_make_request(KVM_REQ_UNBLOCK, vcpu); |
1583 | kvm_vcpu_kick(vcpu); |
1584 | } |
1585 | } |
1586 | |
1587 | /* |
1588 | * The return value from this function is propagated to kvm_set_irq() API, |
1589 | * so it returns: |
1590 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) |
1591 | * = 0 Interrupt was coalesced (previous irq is still pending) |
1592 | * > 0 Number of CPUs interrupt was delivered to |
1593 | * |
1594 | * It is also called directly from kvm_arch_set_irq_inatomic(), where the |
1595 | * only check on its return value is a comparison with -EWOULDBLOCK'. |
1596 | */ |
1597 | int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) |
1598 | { |
1599 | struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; |
1600 | struct kvm_vcpu *vcpu; |
1601 | unsigned long *pending_bits, *mask_bits; |
1602 | unsigned long flags; |
1603 | int port_word_bit; |
1604 | bool kick_vcpu = false; |
1605 | int vcpu_idx, idx, rc; |
1606 | |
1607 | vcpu_idx = READ_ONCE(xe->vcpu_idx); |
1608 | if (vcpu_idx >= 0) |
1609 | vcpu = kvm_get_vcpu(kvm, i: vcpu_idx); |
1610 | else { |
1611 | vcpu = kvm_get_vcpu_by_id(kvm, id: xe->vcpu_id); |
1612 | if (!vcpu) |
1613 | return -EINVAL; |
1614 | WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx); |
1615 | } |
1616 | |
1617 | if (!vcpu->arch.xen.vcpu_info_cache.active) |
1618 | return -EINVAL; |
1619 | |
1620 | if (xe->port >= max_evtchn_port(kvm)) |
1621 | return -EINVAL; |
1622 | |
1623 | rc = -EWOULDBLOCK; |
1624 | |
1625 | idx = srcu_read_lock(ssp: &kvm->srcu); |
1626 | |
1627 | read_lock_irqsave(&gpc->lock, flags); |
1628 | if (!kvm_gpc_check(gpc, PAGE_SIZE)) |
1629 | goto out_rcu; |
1630 | |
1631 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { |
1632 | struct shared_info *shinfo = gpc->khva; |
1633 | pending_bits = (unsigned long *)&shinfo->evtchn_pending; |
1634 | mask_bits = (unsigned long *)&shinfo->evtchn_mask; |
1635 | port_word_bit = xe->port / 64; |
1636 | } else { |
1637 | struct compat_shared_info *shinfo = gpc->khva; |
1638 | pending_bits = (unsigned long *)&shinfo->evtchn_pending; |
1639 | mask_bits = (unsigned long *)&shinfo->evtchn_mask; |
1640 | port_word_bit = xe->port / 32; |
1641 | } |
1642 | |
1643 | /* |
1644 | * If this port wasn't already set, and if it isn't masked, then |
1645 | * we try to set the corresponding bit in the in-kernel shadow of |
1646 | * evtchn_pending_sel for the target vCPU. And if *that* wasn't |
1647 | * already set, then we kick the vCPU in question to write to the |
1648 | * *real* evtchn_pending_sel in its own guest vcpu_info struct. |
1649 | */ |
1650 | if (test_and_set_bit(nr: xe->port, addr: pending_bits)) { |
1651 | rc = 0; /* It was already raised */ |
1652 | } else if (test_bit(xe->port, mask_bits)) { |
1653 | rc = -ENOTCONN; /* Masked */ |
1654 | kvm_xen_check_poller(vcpu, port: xe->port); |
1655 | } else { |
1656 | rc = 1; /* Delivered to the bitmap in shared_info. */ |
1657 | /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */ |
1658 | read_unlock_irqrestore(&gpc->lock, flags); |
1659 | gpc = &vcpu->arch.xen.vcpu_info_cache; |
1660 | |
1661 | read_lock_irqsave(&gpc->lock, flags); |
1662 | if (!kvm_gpc_check(gpc, len: sizeof(struct vcpu_info))) { |
1663 | /* |
1664 | * Could not access the vcpu_info. Set the bit in-kernel |
1665 | * and prod the vCPU to deliver it for itself. |
1666 | */ |
1667 | if (!test_and_set_bit(nr: port_word_bit, addr: &vcpu->arch.xen.evtchn_pending_sel)) |
1668 | kick_vcpu = true; |
1669 | goto out_rcu; |
1670 | } |
1671 | |
1672 | if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { |
1673 | struct vcpu_info *vcpu_info = gpc->khva; |
1674 | if (!test_and_set_bit(nr: port_word_bit, addr: &vcpu_info->evtchn_pending_sel)) { |
1675 | WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); |
1676 | kick_vcpu = true; |
1677 | } |
1678 | } else { |
1679 | struct compat_vcpu_info *vcpu_info = gpc->khva; |
1680 | if (!test_and_set_bit(nr: port_word_bit, |
1681 | addr: (unsigned long *)&vcpu_info->evtchn_pending_sel)) { |
1682 | WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); |
1683 | kick_vcpu = true; |
1684 | } |
1685 | } |
1686 | |
1687 | /* For the per-vCPU lapic vector, deliver it as MSI. */ |
1688 | if (kick_vcpu && vcpu->arch.xen.upcall_vector) { |
1689 | kvm_xen_inject_vcpu_vector(v: vcpu); |
1690 | kick_vcpu = false; |
1691 | } |
1692 | } |
1693 | |
1694 | out_rcu: |
1695 | read_unlock_irqrestore(&gpc->lock, flags); |
1696 | srcu_read_unlock(ssp: &kvm->srcu, idx); |
1697 | |
1698 | if (kick_vcpu) { |
1699 | kvm_make_request(KVM_REQ_UNBLOCK, vcpu); |
1700 | kvm_vcpu_kick(vcpu); |
1701 | } |
1702 | |
1703 | return rc; |
1704 | } |
1705 | |
1706 | static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) |
1707 | { |
1708 | bool mm_borrowed = false; |
1709 | int rc; |
1710 | |
1711 | rc = kvm_xen_set_evtchn_fast(xe, kvm); |
1712 | if (rc != -EWOULDBLOCK) |
1713 | return rc; |
1714 | |
1715 | if (current->mm != kvm->mm) { |
1716 | /* |
1717 | * If not on a thread which already belongs to this KVM, |
1718 | * we'd better be in the irqfd workqueue. |
1719 | */ |
1720 | if (WARN_ON_ONCE(current->mm)) |
1721 | return -EINVAL; |
1722 | |
1723 | kthread_use_mm(mm: kvm->mm); |
1724 | mm_borrowed = true; |
1725 | } |
1726 | |
1727 | mutex_lock(&kvm->arch.xen.xen_lock); |
1728 | |
1729 | /* |
1730 | * It is theoretically possible for the page to be unmapped |
1731 | * and the MMU notifier to invalidate the shared_info before |
1732 | * we even get to use it. In that case, this looks like an |
1733 | * infinite loop. It was tempting to do it via the userspace |
1734 | * HVA instead... but that just *hides* the fact that it's |
1735 | * an infinite loop, because if a fault occurs and it waits |
1736 | * for the page to come back, it can *still* immediately |
1737 | * fault and have to wait again, repeatedly. |
1738 | * |
1739 | * Conversely, the page could also have been reinstated by |
1740 | * another thread before we even obtain the mutex above, so |
1741 | * check again *first* before remapping it. |
1742 | */ |
1743 | do { |
1744 | struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; |
1745 | int idx; |
1746 | |
1747 | rc = kvm_xen_set_evtchn_fast(xe, kvm); |
1748 | if (rc != -EWOULDBLOCK) |
1749 | break; |
1750 | |
1751 | idx = srcu_read_lock(ssp: &kvm->srcu); |
1752 | rc = kvm_gpc_refresh(gpc, PAGE_SIZE); |
1753 | srcu_read_unlock(ssp: &kvm->srcu, idx); |
1754 | } while(!rc); |
1755 | |
1756 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
1757 | |
1758 | if (mm_borrowed) |
1759 | kthread_unuse_mm(mm: kvm->mm); |
1760 | |
1761 | return rc; |
1762 | } |
1763 | |
1764 | /* This is the version called from kvm_set_irq() as the .set function */ |
1765 | static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, |
1766 | int irq_source_id, int level, bool line_status) |
1767 | { |
1768 | if (!level) |
1769 | return -EINVAL; |
1770 | |
1771 | return kvm_xen_set_evtchn(xe: &e->xen_evtchn, kvm); |
1772 | } |
1773 | |
1774 | /* |
1775 | * Set up an event channel interrupt from the KVM IRQ routing table. |
1776 | * Used for e.g. PIRQ from passed through physical devices. |
1777 | */ |
1778 | int kvm_xen_setup_evtchn(struct kvm *kvm, |
1779 | struct kvm_kernel_irq_routing_entry *e, |
1780 | const struct kvm_irq_routing_entry *ue) |
1781 | |
1782 | { |
1783 | struct kvm_vcpu *vcpu; |
1784 | |
1785 | if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) |
1786 | return -EINVAL; |
1787 | |
1788 | /* We only support 2 level event channels for now */ |
1789 | if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) |
1790 | return -EINVAL; |
1791 | |
1792 | /* |
1793 | * Xen gives us interesting mappings from vCPU index to APIC ID, |
1794 | * which means kvm_get_vcpu_by_id() has to iterate over all vCPUs |
1795 | * to find it. Do that once at setup time, instead of every time. |
1796 | * But beware that on live update / live migration, the routing |
1797 | * table might be reinstated before the vCPU threads have finished |
1798 | * recreating their vCPUs. |
1799 | */ |
1800 | vcpu = kvm_get_vcpu_by_id(kvm, id: ue->u.xen_evtchn.vcpu); |
1801 | if (vcpu) |
1802 | e->xen_evtchn.vcpu_idx = vcpu->vcpu_idx; |
1803 | else |
1804 | e->xen_evtchn.vcpu_idx = -1; |
1805 | |
1806 | e->xen_evtchn.port = ue->u.xen_evtchn.port; |
1807 | e->xen_evtchn.vcpu_id = ue->u.xen_evtchn.vcpu; |
1808 | e->xen_evtchn.priority = ue->u.xen_evtchn.priority; |
1809 | e->set = evtchn_set_fn; |
1810 | |
1811 | return 0; |
1812 | } |
1813 | |
1814 | /* |
1815 | * Explicit event sending from userspace with KVM_XEN_HVM_EVTCHN_SEND ioctl. |
1816 | */ |
1817 | int kvm_xen_hvm_evtchn_send(struct kvm *kvm, struct kvm_irq_routing_xen_evtchn *uxe) |
1818 | { |
1819 | struct kvm_xen_evtchn e; |
1820 | int ret; |
1821 | |
1822 | if (!uxe->port || uxe->port >= max_evtchn_port(kvm)) |
1823 | return -EINVAL; |
1824 | |
1825 | /* We only support 2 level event channels for now */ |
1826 | if (uxe->priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) |
1827 | return -EINVAL; |
1828 | |
1829 | e.port = uxe->port; |
1830 | e.vcpu_id = uxe->vcpu; |
1831 | e.vcpu_idx = -1; |
1832 | e.priority = uxe->priority; |
1833 | |
1834 | ret = kvm_xen_set_evtchn(xe: &e, kvm); |
1835 | |
1836 | /* |
1837 | * None of that 'return 1 if it actually got delivered' nonsense. |
1838 | * We don't care if it was masked (-ENOTCONN) either. |
1839 | */ |
1840 | if (ret > 0 || ret == -ENOTCONN) |
1841 | ret = 0; |
1842 | |
1843 | return ret; |
1844 | } |
1845 | |
1846 | /* |
1847 | * Support for *outbound* event channel events via the EVTCHNOP_send hypercall. |
1848 | */ |
1849 | struct evtchnfd { |
1850 | u32 send_port; |
1851 | u32 type; |
1852 | union { |
1853 | struct kvm_xen_evtchn port; |
1854 | struct { |
1855 | u32 port; /* zero */ |
1856 | struct eventfd_ctx *ctx; |
1857 | } eventfd; |
1858 | } deliver; |
1859 | }; |
1860 | |
1861 | /* |
1862 | * Update target vCPU or priority for a registered sending channel. |
1863 | */ |
1864 | static int kvm_xen_eventfd_update(struct kvm *kvm, |
1865 | struct kvm_xen_hvm_attr *data) |
1866 | { |
1867 | u32 port = data->u.evtchn.send_port; |
1868 | struct evtchnfd *evtchnfd; |
1869 | int ret; |
1870 | |
1871 | /* Protect writes to evtchnfd as well as the idr lookup. */ |
1872 | mutex_lock(&kvm->arch.xen.xen_lock); |
1873 | evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, id: port); |
1874 | |
1875 | ret = -ENOENT; |
1876 | if (!evtchnfd) |
1877 | goto out_unlock; |
1878 | |
1879 | /* For an UPDATE, nothing may change except the priority/vcpu */ |
1880 | ret = -EINVAL; |
1881 | if (evtchnfd->type != data->u.evtchn.type) |
1882 | goto out_unlock; |
1883 | |
1884 | /* |
1885 | * Port cannot change, and if it's zero that was an eventfd |
1886 | * which can't be changed either. |
1887 | */ |
1888 | if (!evtchnfd->deliver.port.port || |
1889 | evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port) |
1890 | goto out_unlock; |
1891 | |
1892 | /* We only support 2 level event channels for now */ |
1893 | if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) |
1894 | goto out_unlock; |
1895 | |
1896 | evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; |
1897 | if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) { |
1898 | evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu; |
1899 | evtchnfd->deliver.port.vcpu_idx = -1; |
1900 | } |
1901 | ret = 0; |
1902 | out_unlock: |
1903 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
1904 | return ret; |
1905 | } |
1906 | |
1907 | /* |
1908 | * Configure the target (eventfd or local port delivery) for sending on |
1909 | * a given event channel. |
1910 | */ |
1911 | static int kvm_xen_eventfd_assign(struct kvm *kvm, |
1912 | struct kvm_xen_hvm_attr *data) |
1913 | { |
1914 | u32 port = data->u.evtchn.send_port; |
1915 | struct eventfd_ctx *eventfd = NULL; |
1916 | struct evtchnfd *evtchnfd; |
1917 | int ret = -EINVAL; |
1918 | |
1919 | evtchnfd = kzalloc(size: sizeof(struct evtchnfd), GFP_KERNEL); |
1920 | if (!evtchnfd) |
1921 | return -ENOMEM; |
1922 | |
1923 | switch(data->u.evtchn.type) { |
1924 | case EVTCHNSTAT_ipi: |
1925 | /* IPI must map back to the same port# */ |
1926 | if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port) |
1927 | goto out_noeventfd; /* -EINVAL */ |
1928 | break; |
1929 | |
1930 | case EVTCHNSTAT_interdomain: |
1931 | if (data->u.evtchn.deliver.port.port) { |
1932 | if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm)) |
1933 | goto out_noeventfd; /* -EINVAL */ |
1934 | } else { |
1935 | eventfd = eventfd_ctx_fdget(fd: data->u.evtchn.deliver.eventfd.fd); |
1936 | if (IS_ERR(ptr: eventfd)) { |
1937 | ret = PTR_ERR(ptr: eventfd); |
1938 | goto out_noeventfd; |
1939 | } |
1940 | } |
1941 | break; |
1942 | |
1943 | case EVTCHNSTAT_virq: |
1944 | case EVTCHNSTAT_closed: |
1945 | case EVTCHNSTAT_unbound: |
1946 | case EVTCHNSTAT_pirq: |
1947 | default: /* Unknown event channel type */ |
1948 | goto out; /* -EINVAL */ |
1949 | } |
1950 | |
1951 | evtchnfd->send_port = data->u.evtchn.send_port; |
1952 | evtchnfd->type = data->u.evtchn.type; |
1953 | if (eventfd) { |
1954 | evtchnfd->deliver.eventfd.ctx = eventfd; |
1955 | } else { |
1956 | /* We only support 2 level event channels for now */ |
1957 | if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) |
1958 | goto out; /* -EINVAL; */ |
1959 | |
1960 | evtchnfd->deliver.port.port = data->u.evtchn.deliver.port.port; |
1961 | evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu; |
1962 | evtchnfd->deliver.port.vcpu_idx = -1; |
1963 | evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; |
1964 | } |
1965 | |
1966 | mutex_lock(&kvm->arch.xen.xen_lock); |
1967 | ret = idr_alloc(&kvm->arch.xen.evtchn_ports, ptr: evtchnfd, start: port, end: port + 1, |
1968 | GFP_KERNEL); |
1969 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
1970 | if (ret >= 0) |
1971 | return 0; |
1972 | |
1973 | if (ret == -ENOSPC) |
1974 | ret = -EEXIST; |
1975 | out: |
1976 | if (eventfd) |
1977 | eventfd_ctx_put(ctx: eventfd); |
1978 | out_noeventfd: |
1979 | kfree(objp: evtchnfd); |
1980 | return ret; |
1981 | } |
1982 | |
1983 | static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) |
1984 | { |
1985 | struct evtchnfd *evtchnfd; |
1986 | |
1987 | mutex_lock(&kvm->arch.xen.xen_lock); |
1988 | evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, id: port); |
1989 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
1990 | |
1991 | if (!evtchnfd) |
1992 | return -ENOENT; |
1993 | |
1994 | synchronize_srcu(ssp: &kvm->srcu); |
1995 | if (!evtchnfd->deliver.port.port) |
1996 | eventfd_ctx_put(ctx: evtchnfd->deliver.eventfd.ctx); |
1997 | kfree(objp: evtchnfd); |
1998 | return 0; |
1999 | } |
2000 | |
2001 | static int kvm_xen_eventfd_reset(struct kvm *kvm) |
2002 | { |
2003 | struct evtchnfd *evtchnfd, **all_evtchnfds; |
2004 | int i; |
2005 | int n = 0; |
2006 | |
2007 | mutex_lock(&kvm->arch.xen.xen_lock); |
2008 | |
2009 | /* |
2010 | * Because synchronize_srcu() cannot be called inside the |
2011 | * critical section, first collect all the evtchnfd objects |
2012 | * in an array as they are removed from evtchn_ports. |
2013 | */ |
2014 | idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) |
2015 | n++; |
2016 | |
2017 | all_evtchnfds = kmalloc_array(n, size: sizeof(struct evtchnfd *), GFP_KERNEL); |
2018 | if (!all_evtchnfds) { |
2019 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
2020 | return -ENOMEM; |
2021 | } |
2022 | |
2023 | n = 0; |
2024 | idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { |
2025 | all_evtchnfds[n++] = evtchnfd; |
2026 | idr_remove(&kvm->arch.xen.evtchn_ports, id: evtchnfd->send_port); |
2027 | } |
2028 | mutex_unlock(lock: &kvm->arch.xen.xen_lock); |
2029 | |
2030 | synchronize_srcu(ssp: &kvm->srcu); |
2031 | |
2032 | while (n--) { |
2033 | evtchnfd = all_evtchnfds[n]; |
2034 | if (!evtchnfd->deliver.port.port) |
2035 | eventfd_ctx_put(ctx: evtchnfd->deliver.eventfd.ctx); |
2036 | kfree(objp: evtchnfd); |
2037 | } |
2038 | kfree(objp: all_evtchnfds); |
2039 | |
2040 | return 0; |
2041 | } |
2042 | |
2043 | static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data) |
2044 | { |
2045 | u32 port = data->u.evtchn.send_port; |
2046 | |
2047 | if (data->u.evtchn.flags == KVM_XEN_EVTCHN_RESET) |
2048 | return kvm_xen_eventfd_reset(kvm); |
2049 | |
2050 | if (!port || port >= max_evtchn_port(kvm)) |
2051 | return -EINVAL; |
2052 | |
2053 | if (data->u.evtchn.flags == KVM_XEN_EVTCHN_DEASSIGN) |
2054 | return kvm_xen_eventfd_deassign(kvm, port); |
2055 | if (data->u.evtchn.flags == KVM_XEN_EVTCHN_UPDATE) |
2056 | return kvm_xen_eventfd_update(kvm, data); |
2057 | if (data->u.evtchn.flags) |
2058 | return -EINVAL; |
2059 | |
2060 | return kvm_xen_eventfd_assign(kvm, data); |
2061 | } |
2062 | |
2063 | static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r) |
2064 | { |
2065 | struct evtchnfd *evtchnfd; |
2066 | struct evtchn_send send; |
2067 | struct x86_exception e; |
2068 | |
2069 | /* Sanity check: this structure is the same for 32-bit and 64-bit */ |
2070 | BUILD_BUG_ON(sizeof(send) != 4); |
2071 | if (kvm_read_guest_virt(vcpu, addr: param, val: &send, bytes: sizeof(send), exception: &e)) { |
2072 | *r = -EFAULT; |
2073 | return true; |
2074 | } |
2075 | |
2076 | /* |
2077 | * evtchnfd is protected by kvm->srcu; the idr lookup instead |
2078 | * is protected by RCU. |
2079 | */ |
2080 | rcu_read_lock(); |
2081 | evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, id: send.port); |
2082 | rcu_read_unlock(); |
2083 | if (!evtchnfd) |
2084 | return false; |
2085 | |
2086 | if (evtchnfd->deliver.port.port) { |
2087 | int ret = kvm_xen_set_evtchn(xe: &evtchnfd->deliver.port, kvm: vcpu->kvm); |
2088 | if (ret < 0 && ret != -ENOTCONN) |
2089 | return false; |
2090 | } else { |
2091 | eventfd_signal(ctx: evtchnfd->deliver.eventfd.ctx, n: 1); |
2092 | } |
2093 | |
2094 | *r = 0; |
2095 | return true; |
2096 | } |
2097 | |
2098 | void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) |
2099 | { |
2100 | vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx; |
2101 | vcpu->arch.xen.poll_evtchn = 0; |
2102 | |
2103 | timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); |
2104 | |
2105 | kvm_gpc_init(gpc: &vcpu->arch.xen.runstate_cache, kvm: vcpu->kvm, NULL, |
2106 | usage: KVM_HOST_USES_PFN); |
2107 | kvm_gpc_init(gpc: &vcpu->arch.xen.runstate2_cache, kvm: vcpu->kvm, NULL, |
2108 | usage: KVM_HOST_USES_PFN); |
2109 | kvm_gpc_init(gpc: &vcpu->arch.xen.vcpu_info_cache, kvm: vcpu->kvm, NULL, |
2110 | usage: KVM_HOST_USES_PFN); |
2111 | kvm_gpc_init(gpc: &vcpu->arch.xen.vcpu_time_info_cache, kvm: vcpu->kvm, NULL, |
2112 | usage: KVM_HOST_USES_PFN); |
2113 | } |
2114 | |
2115 | void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) |
2116 | { |
2117 | if (kvm_xen_timer_enabled(vcpu)) |
2118 | kvm_xen_stop_timer(vcpu); |
2119 | |
2120 | kvm_gpc_deactivate(gpc: &vcpu->arch.xen.runstate_cache); |
2121 | kvm_gpc_deactivate(gpc: &vcpu->arch.xen.runstate2_cache); |
2122 | kvm_gpc_deactivate(gpc: &vcpu->arch.xen.vcpu_info_cache); |
2123 | kvm_gpc_deactivate(gpc: &vcpu->arch.xen.vcpu_time_info_cache); |
2124 | |
2125 | del_timer_sync(timer: &vcpu->arch.xen.poll_timer); |
2126 | } |
2127 | |
2128 | void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu) |
2129 | { |
2130 | struct kvm_cpuid_entry2 *entry; |
2131 | u32 function; |
2132 | |
2133 | if (!vcpu->arch.xen.cpuid.base) |
2134 | return; |
2135 | |
2136 | function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3); |
2137 | if (function > vcpu->arch.xen.cpuid.limit) |
2138 | return; |
2139 | |
2140 | entry = kvm_find_cpuid_entry_index(vcpu, function, index: 1); |
2141 | if (entry) { |
2142 | entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul; |
2143 | entry->edx = vcpu->arch.hv_clock.tsc_shift; |
2144 | } |
2145 | |
2146 | entry = kvm_find_cpuid_entry_index(vcpu, function, index: 2); |
2147 | if (entry) |
2148 | entry->eax = vcpu->arch.hw_tsc_khz; |
2149 | } |
2150 | |
2151 | void kvm_xen_init_vm(struct kvm *kvm) |
2152 | { |
2153 | mutex_init(&kvm->arch.xen.xen_lock); |
2154 | idr_init(idr: &kvm->arch.xen.evtchn_ports); |
2155 | kvm_gpc_init(gpc: &kvm->arch.xen.shinfo_cache, kvm, NULL, usage: KVM_HOST_USES_PFN); |
2156 | } |
2157 | |
2158 | void kvm_xen_destroy_vm(struct kvm *kvm) |
2159 | { |
2160 | struct evtchnfd *evtchnfd; |
2161 | int i; |
2162 | |
2163 | kvm_gpc_deactivate(gpc: &kvm->arch.xen.shinfo_cache); |
2164 | |
2165 | idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { |
2166 | if (!evtchnfd->deliver.port.port) |
2167 | eventfd_ctx_put(ctx: evtchnfd->deliver.eventfd.ctx); |
2168 | kfree(objp: evtchnfd); |
2169 | } |
2170 | idr_destroy(&kvm->arch.xen.evtchn_ports); |
2171 | |
2172 | if (kvm->arch.xen_hvm_config.msr) |
2173 | static_branch_slow_dec_deferred(&kvm_xen_enabled); |
2174 | } |
2175 | |