1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2017 ARM Ltd. |
4 | * Author: Marc Zyngier <marc.zyngier@arm.com> |
5 | */ |
6 | |
7 | #include <linux/interrupt.h> |
8 | #include <linux/irq.h> |
9 | #include <linux/irqdomain.h> |
10 | #include <linux/kvm_host.h> |
11 | #include <linux/irqchip/arm-gic-v3.h> |
12 | |
13 | #include "vgic.h" |
14 | |
15 | /* |
16 | * How KVM uses GICv4 (insert rude comments here): |
17 | * |
18 | * The vgic-v4 layer acts as a bridge between several entities: |
19 | * - The GICv4 ITS representation offered by the ITS driver |
20 | * - VFIO, which is in charge of the PCI endpoint |
21 | * - The virtual ITS, which is the only thing the guest sees |
22 | * |
23 | * The configuration of VLPIs is triggered by a callback from VFIO, |
24 | * instructing KVM that a PCI device has been configured to deliver |
25 | * MSIs to a vITS. |
26 | * |
27 | * kvm_vgic_v4_set_forwarding() is thus called with the routing entry, |
28 | * and this is used to find the corresponding vITS data structures |
29 | * (ITS instance, device, event and irq) using a process that is |
30 | * extremely similar to the injection of an MSI. |
31 | * |
32 | * At this stage, we can link the guest's view of an LPI (uniquely |
33 | * identified by the routing entry) and the host irq, using the GICv4 |
34 | * driver mapping operation. Should the mapping succeed, we've then |
35 | * successfully upgraded the guest's LPI to a VLPI. We can then start |
36 | * with updating GICv4's view of the property table and generating an |
37 | * INValidation in order to kickstart the delivery of this VLPI to the |
38 | * guest directly, without software intervention. Well, almost. |
39 | * |
40 | * When the PCI endpoint is deconfigured, this operation is reversed |
41 | * with VFIO calling kvm_vgic_v4_unset_forwarding(). |
42 | * |
43 | * Once the VLPI has been mapped, it needs to follow any change the |
44 | * guest performs on its LPI through the vITS. For that, a number of |
45 | * command handlers have hooks to communicate these changes to the HW: |
46 | * - Any invalidation triggers a call to its_prop_update_vlpi() |
47 | * - The INT command results in a irq_set_irqchip_state(), which |
48 | * generates an INT on the corresponding VLPI. |
49 | * - The CLEAR command results in a irq_set_irqchip_state(), which |
50 | * generates an CLEAR on the corresponding VLPI. |
51 | * - DISCARD translates into an unmap, similar to a call to |
52 | * kvm_vgic_v4_unset_forwarding(). |
53 | * - MOVI is translated by an update of the existing mapping, changing |
54 | * the target vcpu, resulting in a VMOVI being generated. |
55 | * - MOVALL is translated by a string of mapping updates (similar to |
56 | * the handling of MOVI). MOVALL is horrible. |
57 | * |
58 | * Note that a DISCARD/MAPTI sequence emitted from the guest without |
59 | * reprogramming the PCI endpoint after MAPTI does not result in a |
60 | * VLPI being mapped, as there is no callback from VFIO (the guest |
61 | * will get the interrupt via the normal SW injection). Fixing this is |
62 | * not trivial, and requires some horrible messing with the VFIO |
63 | * internals. Not fun. Don't do that. |
64 | * |
65 | * Then there is the scheduling. Each time a vcpu is about to run on a |
66 | * physical CPU, KVM must tell the corresponding redistributor about |
67 | * it. And if we've migrated our vcpu from one CPU to another, we must |
68 | * tell the ITS (so that the messages reach the right redistributor). |
69 | * This is done in two steps: first issue a irq_set_affinity() on the |
70 | * irq corresponding to the vcpu, then call its_make_vpe_resident(). |
71 | * You must be in a non-preemptible context. On exit, a call to |
72 | * its_make_vpe_non_resident() tells the redistributor that we're done |
73 | * with the vcpu. |
74 | * |
75 | * Finally, the doorbell handling: Each vcpu is allocated an interrupt |
76 | * which will fire each time a VLPI is made pending whilst the vcpu is |
77 | * not running. Each time the vcpu gets blocked, the doorbell |
78 | * interrupt gets enabled. When the vcpu is unblocked (for whatever |
79 | * reason), the doorbell interrupt is disabled. |
80 | */ |
81 | |
82 | #define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING) |
83 | |
84 | static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) |
85 | { |
86 | struct kvm_vcpu *vcpu = info; |
87 | |
88 | /* We got the message, no need to fire again */ |
89 | if (!kvm_vgic_global_state.has_gicv4_1 && |
90 | !irqd_irq_disabled(d: &irq_to_desc(irq)->irq_data)) |
91 | disable_irq_nosync(irq); |
92 | |
93 | /* |
94 | * The v4.1 doorbell can fire concurrently with the vPE being |
95 | * made non-resident. Ensure we only update pending_last |
96 | * *after* the non-residency sequence has completed. |
97 | */ |
98 | raw_spin_lock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); |
99 | vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; |
100 | raw_spin_unlock(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_lock); |
101 | |
102 | kvm_make_request(req: KVM_REQ_IRQ_PENDING, vcpu); |
103 | kvm_vcpu_kick(vcpu); |
104 | |
105 | return IRQ_HANDLED; |
106 | } |
107 | |
108 | static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq) |
109 | { |
110 | vpe->sgi_config[irq->intid].enabled = irq->enabled; |
111 | vpe->sgi_config[irq->intid].group = irq->group; |
112 | vpe->sgi_config[irq->intid].priority = irq->priority; |
113 | } |
114 | |
115 | static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu) |
116 | { |
117 | struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; |
118 | int i; |
119 | |
120 | /* |
121 | * With GICv4.1, every virtual SGI can be directly injected. So |
122 | * let's pretend that they are HW interrupts, tied to a host |
123 | * IRQ. The SGI code will do its magic. |
124 | */ |
125 | for (i = 0; i < VGIC_NR_SGIS; i++) { |
126 | struct vgic_irq *irq = vgic_get_irq(kvm: vcpu->kvm, vcpu, intid: i); |
127 | struct irq_desc *desc; |
128 | unsigned long flags; |
129 | int ret; |
130 | |
131 | raw_spin_lock_irqsave(&irq->irq_lock, flags); |
132 | |
133 | if (irq->hw) |
134 | goto unlock; |
135 | |
136 | irq->hw = true; |
137 | irq->host_irq = irq_find_mapping(domain: vpe->sgi_domain, hwirq: i); |
138 | |
139 | /* Transfer the full irq state to the vPE */ |
140 | vgic_v4_sync_sgi_config(vpe, irq); |
141 | desc = irq_to_desc(irq: irq->host_irq); |
142 | ret = irq_domain_activate_irq(irq_data: irq_desc_get_irq_data(desc), |
143 | early: false); |
144 | if (!WARN_ON(ret)) { |
145 | /* Transfer pending state */ |
146 | ret = irq_set_irqchip_state(irq: irq->host_irq, |
147 | which: IRQCHIP_STATE_PENDING, |
148 | state: irq->pending_latch); |
149 | WARN_ON(ret); |
150 | irq->pending_latch = false; |
151 | } |
152 | unlock: |
153 | raw_spin_unlock_irqrestore(&irq->irq_lock, flags); |
154 | vgic_put_irq(kvm: vcpu->kvm, irq); |
155 | } |
156 | } |
157 | |
158 | static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) |
159 | { |
160 | int i; |
161 | |
162 | for (i = 0; i < VGIC_NR_SGIS; i++) { |
163 | struct vgic_irq *irq = vgic_get_irq(kvm: vcpu->kvm, vcpu, intid: i); |
164 | struct irq_desc *desc; |
165 | unsigned long flags; |
166 | int ret; |
167 | |
168 | raw_spin_lock_irqsave(&irq->irq_lock, flags); |
169 | |
170 | if (!irq->hw) |
171 | goto unlock; |
172 | |
173 | irq->hw = false; |
174 | ret = irq_get_irqchip_state(irq: irq->host_irq, |
175 | which: IRQCHIP_STATE_PENDING, |
176 | state: &irq->pending_latch); |
177 | WARN_ON(ret); |
178 | |
179 | desc = irq_to_desc(irq: irq->host_irq); |
180 | irq_domain_deactivate_irq(irq_data: irq_desc_get_irq_data(desc)); |
181 | unlock: |
182 | raw_spin_unlock_irqrestore(&irq->irq_lock, flags); |
183 | vgic_put_irq(kvm: vcpu->kvm, irq); |
184 | } |
185 | } |
186 | |
187 | void vgic_v4_configure_vsgis(struct kvm *kvm) |
188 | { |
189 | struct vgic_dist *dist = &kvm->arch.vgic; |
190 | struct kvm_vcpu *vcpu; |
191 | unsigned long i; |
192 | |
193 | lockdep_assert_held(&kvm->arch.config_lock); |
194 | |
195 | kvm_arm_halt_guest(kvm); |
196 | |
197 | kvm_for_each_vcpu(i, vcpu, kvm) { |
198 | if (dist->nassgireq) |
199 | vgic_v4_enable_vsgis(vcpu); |
200 | else |
201 | vgic_v4_disable_vsgis(vcpu); |
202 | } |
203 | |
204 | kvm_arm_resume_guest(kvm); |
205 | } |
206 | |
207 | /* |
208 | * Must be called with GICv4.1 and the vPE unmapped, which |
209 | * indicates the invalidation of any VPT caches associated |
210 | * with the vPE, thus we can get the VLPI state by peeking |
211 | * at the VPT. |
212 | */ |
213 | void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val) |
214 | { |
215 | struct its_vpe *vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe; |
216 | int mask = BIT(irq->intid % BITS_PER_BYTE); |
217 | void *va; |
218 | u8 *ptr; |
219 | |
220 | va = page_address(vpe->vpt_page); |
221 | ptr = va + irq->intid / BITS_PER_BYTE; |
222 | |
223 | *val = !!(*ptr & mask); |
224 | } |
225 | |
226 | int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq) |
227 | { |
228 | return request_irq(irq, handler: vgic_v4_doorbell_handler, flags: 0, name: "vcpu" , dev: vcpu); |
229 | } |
230 | |
231 | /** |
232 | * vgic_v4_init - Initialize the GICv4 data structures |
233 | * @kvm: Pointer to the VM being initialized |
234 | * |
235 | * We may be called each time a vITS is created, or when the |
236 | * vgic is initialized. In both cases, the number of vcpus |
237 | * should now be fixed. |
238 | */ |
239 | int vgic_v4_init(struct kvm *kvm) |
240 | { |
241 | struct vgic_dist *dist = &kvm->arch.vgic; |
242 | struct kvm_vcpu *vcpu; |
243 | int nr_vcpus, ret; |
244 | unsigned long i; |
245 | |
246 | lockdep_assert_held(&kvm->arch.config_lock); |
247 | |
248 | if (!kvm_vgic_global_state.has_gicv4) |
249 | return 0; /* Nothing to see here... move along. */ |
250 | |
251 | if (dist->its_vm.vpes) |
252 | return 0; |
253 | |
254 | nr_vcpus = atomic_read(v: &kvm->online_vcpus); |
255 | |
256 | dist->its_vm.vpes = kcalloc(n: nr_vcpus, size: sizeof(*dist->its_vm.vpes), |
257 | GFP_KERNEL_ACCOUNT); |
258 | if (!dist->its_vm.vpes) |
259 | return -ENOMEM; |
260 | |
261 | dist->its_vm.nr_vpes = nr_vcpus; |
262 | |
263 | kvm_for_each_vcpu(i, vcpu, kvm) |
264 | dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; |
265 | |
266 | ret = its_alloc_vcpu_irqs(&dist->its_vm); |
267 | if (ret < 0) { |
268 | kvm_err("VPE IRQ allocation failure\n" ); |
269 | kfree(objp: dist->its_vm.vpes); |
270 | dist->its_vm.nr_vpes = 0; |
271 | dist->its_vm.vpes = NULL; |
272 | return ret; |
273 | } |
274 | |
275 | kvm_for_each_vcpu(i, vcpu, kvm) { |
276 | int irq = dist->its_vm.vpes[i]->irq; |
277 | unsigned long irq_flags = DB_IRQ_FLAGS; |
278 | |
279 | /* |
280 | * Don't automatically enable the doorbell, as we're |
281 | * flipping it back and forth when the vcpu gets |
282 | * blocked. Also disable the lazy disabling, as the |
283 | * doorbell could kick us out of the guest too |
284 | * early... |
285 | * |
286 | * On GICv4.1, the doorbell is managed in HW and must |
287 | * be left enabled. |
288 | */ |
289 | if (kvm_vgic_global_state.has_gicv4_1) |
290 | irq_flags &= ~IRQ_NOAUTOEN; |
291 | irq_set_status_flags(irq, set: irq_flags); |
292 | |
293 | ret = vgic_v4_request_vpe_irq(vcpu, irq); |
294 | if (ret) { |
295 | kvm_err("failed to allocate vcpu IRQ%d\n" , irq); |
296 | /* |
297 | * Trick: adjust the number of vpes so we know |
298 | * how many to nuke on teardown... |
299 | */ |
300 | dist->its_vm.nr_vpes = i; |
301 | break; |
302 | } |
303 | } |
304 | |
305 | if (ret) |
306 | vgic_v4_teardown(kvm); |
307 | |
308 | return ret; |
309 | } |
310 | |
311 | /** |
312 | * vgic_v4_teardown - Free the GICv4 data structures |
313 | * @kvm: Pointer to the VM being destroyed |
314 | */ |
315 | void vgic_v4_teardown(struct kvm *kvm) |
316 | { |
317 | struct its_vm *its_vm = &kvm->arch.vgic.its_vm; |
318 | int i; |
319 | |
320 | lockdep_assert_held(&kvm->arch.config_lock); |
321 | |
322 | if (!its_vm->vpes) |
323 | return; |
324 | |
325 | for (i = 0; i < its_vm->nr_vpes; i++) { |
326 | struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i); |
327 | int irq = its_vm->vpes[i]->irq; |
328 | |
329 | irq_clear_status_flags(irq, DB_IRQ_FLAGS); |
330 | free_irq(irq, vcpu); |
331 | } |
332 | |
333 | its_free_vcpu_irqs(its_vm); |
334 | kfree(objp: its_vm->vpes); |
335 | its_vm->nr_vpes = 0; |
336 | its_vm->vpes = NULL; |
337 | } |
338 | |
339 | int vgic_v4_put(struct kvm_vcpu *vcpu) |
340 | { |
341 | struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; |
342 | |
343 | if (!vgic_supports_direct_msis(kvm: vcpu->kvm) || !vpe->resident) |
344 | return 0; |
345 | |
346 | return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI)); |
347 | } |
348 | |
349 | int vgic_v4_load(struct kvm_vcpu *vcpu) |
350 | { |
351 | struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; |
352 | int err; |
353 | |
354 | if (!vgic_supports_direct_msis(kvm: vcpu->kvm) || vpe->resident) |
355 | return 0; |
356 | |
357 | if (vcpu_get_flag(vcpu, IN_WFI)) |
358 | return 0; |
359 | |
360 | /* |
361 | * Before making the VPE resident, make sure the redistributor |
362 | * corresponding to our current CPU expects us here. See the |
363 | * doc in drivers/irqchip/irq-gic-v4.c to understand how this |
364 | * turns into a VMOVP command at the ITS level. |
365 | */ |
366 | err = irq_set_affinity(irq: vpe->irq, cpumask_of(smp_processor_id())); |
367 | if (err) |
368 | return err; |
369 | |
370 | err = its_make_vpe_resident(vpe, false, vcpu->kvm->arch.vgic.enabled); |
371 | if (err) |
372 | return err; |
373 | |
374 | /* |
375 | * Now that the VPE is resident, let's get rid of a potential |
376 | * doorbell interrupt that would still be pending. This is a |
377 | * GICv4.0 only "feature"... |
378 | */ |
379 | if (!kvm_vgic_global_state.has_gicv4_1) |
380 | err = irq_set_irqchip_state(irq: vpe->irq, which: IRQCHIP_STATE_PENDING, state: false); |
381 | |
382 | return err; |
383 | } |
384 | |
385 | void vgic_v4_commit(struct kvm_vcpu *vcpu) |
386 | { |
387 | struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; |
388 | |
389 | /* |
390 | * No need to wait for the vPE to be ready across a shallow guest |
391 | * exit, as only a vcpu_put will invalidate it. |
392 | */ |
393 | if (!vpe->ready) |
394 | its_commit_vpe(vpe); |
395 | } |
396 | |
397 | static struct vgic_its *vgic_get_its(struct kvm *kvm, |
398 | struct kvm_kernel_irq_routing_entry *irq_entry) |
399 | { |
400 | struct kvm_msi msi = (struct kvm_msi) { |
401 | .address_lo = irq_entry->msi.address_lo, |
402 | .address_hi = irq_entry->msi.address_hi, |
403 | .data = irq_entry->msi.data, |
404 | .flags = irq_entry->msi.flags, |
405 | .devid = irq_entry->msi.devid, |
406 | }; |
407 | |
408 | return vgic_msi_to_its(kvm, msi: &msi); |
409 | } |
410 | |
411 | int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, |
412 | struct kvm_kernel_irq_routing_entry *irq_entry) |
413 | { |
414 | struct vgic_its *its; |
415 | struct vgic_irq *irq; |
416 | struct its_vlpi_map map; |
417 | unsigned long flags; |
418 | int ret; |
419 | |
420 | if (!vgic_supports_direct_msis(kvm)) |
421 | return 0; |
422 | |
423 | /* |
424 | * Get the ITS, and escape early on error (not a valid |
425 | * doorbell for any of our vITSs). |
426 | */ |
427 | its = vgic_get_its(kvm, irq_entry); |
428 | if (IS_ERR(ptr: its)) |
429 | return 0; |
430 | |
431 | mutex_lock(&its->its_lock); |
432 | |
433 | /* Perform the actual DevID/EventID -> LPI translation. */ |
434 | ret = vgic_its_resolve_lpi(kvm, its, devid: irq_entry->msi.devid, |
435 | eventid: irq_entry->msi.data, irq: &irq); |
436 | if (ret) |
437 | goto out; |
438 | |
439 | /* Silently exit if the vLPI is already mapped */ |
440 | if (irq->hw) |
441 | goto out; |
442 | |
443 | /* |
444 | * Emit the mapping request. If it fails, the ITS probably |
445 | * isn't v4 compatible, so let's silently bail out. Holding |
446 | * the ITS lock should ensure that nothing can modify the |
447 | * target vcpu. |
448 | */ |
449 | map = (struct its_vlpi_map) { |
450 | .vm = &kvm->arch.vgic.its_vm, |
451 | .vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe, |
452 | .vintid = irq->intid, |
453 | .properties = ((irq->priority & 0xfc) | |
454 | (irq->enabled ? LPI_PROP_ENABLED : 0) | |
455 | LPI_PROP_GROUP1), |
456 | .db_enabled = true, |
457 | }; |
458 | |
459 | ret = its_map_vlpi(virq, &map); |
460 | if (ret) |
461 | goto out; |
462 | |
463 | irq->hw = true; |
464 | irq->host_irq = virq; |
465 | atomic_inc(v: &map.vpe->vlpi_count); |
466 | |
467 | /* Transfer pending state */ |
468 | raw_spin_lock_irqsave(&irq->irq_lock, flags); |
469 | if (irq->pending_latch) { |
470 | ret = irq_set_irqchip_state(irq: irq->host_irq, |
471 | which: IRQCHIP_STATE_PENDING, |
472 | state: irq->pending_latch); |
473 | WARN_RATELIMIT(ret, "IRQ %d" , irq->host_irq); |
474 | |
475 | /* |
476 | * Clear pending_latch and communicate this state |
477 | * change via vgic_queue_irq_unlock. |
478 | */ |
479 | irq->pending_latch = false; |
480 | vgic_queue_irq_unlock(kvm, irq, flags); |
481 | } else { |
482 | raw_spin_unlock_irqrestore(&irq->irq_lock, flags); |
483 | } |
484 | |
485 | out: |
486 | mutex_unlock(lock: &its->its_lock); |
487 | return ret; |
488 | } |
489 | |
490 | int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, |
491 | struct kvm_kernel_irq_routing_entry *irq_entry) |
492 | { |
493 | struct vgic_its *its; |
494 | struct vgic_irq *irq; |
495 | int ret; |
496 | |
497 | if (!vgic_supports_direct_msis(kvm)) |
498 | return 0; |
499 | |
500 | /* |
501 | * Get the ITS, and escape early on error (not a valid |
502 | * doorbell for any of our vITSs). |
503 | */ |
504 | its = vgic_get_its(kvm, irq_entry); |
505 | if (IS_ERR(ptr: its)) |
506 | return 0; |
507 | |
508 | mutex_lock(&its->its_lock); |
509 | |
510 | ret = vgic_its_resolve_lpi(kvm, its, devid: irq_entry->msi.devid, |
511 | eventid: irq_entry->msi.data, irq: &irq); |
512 | if (ret) |
513 | goto out; |
514 | |
515 | WARN_ON(!(irq->hw && irq->host_irq == virq)); |
516 | if (irq->hw) { |
517 | atomic_dec(v: &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count); |
518 | irq->hw = false; |
519 | ret = its_unmap_vlpi(virq); |
520 | } |
521 | |
522 | out: |
523 | mutex_unlock(lock: &its->its_lock); |
524 | return ret; |
525 | } |
526 | |