1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Xen event channels |
4 | * |
5 | * Xen models interrupts with abstract event channels. Because each |
6 | * domain gets 1024 event channels, but NR_IRQ is not that large, we |
7 | * must dynamically map irqs<->event channels. The event channels |
8 | * interface with the rest of the kernel by defining a xen interrupt |
9 | * chip. When an event is received, it is mapped to an irq and sent |
10 | * through the normal interrupt processing path. |
11 | * |
12 | * There are four kinds of events which can be mapped to an event |
13 | * channel: |
14 | * |
15 | * 1. Inter-domain notifications. This includes all the virtual |
16 | * device events, since they're driven by front-ends in another domain |
17 | * (typically dom0). |
18 | * 2. VIRQs, typically used for timers. These are per-cpu events. |
19 | * 3. IPIs. |
20 | * 4. PIRQs - Hardware interrupts. |
21 | * |
22 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
23 | */ |
24 | |
25 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt |
26 | |
27 | #include <linux/linkage.h> |
28 | #include <linux/interrupt.h> |
29 | #include <linux/irq.h> |
30 | #include <linux/moduleparam.h> |
31 | #include <linux/string.h> |
32 | #include <linux/memblock.h> |
33 | #include <linux/slab.h> |
34 | #include <linux/irqnr.h> |
35 | #include <linux/pci.h> |
36 | #include <linux/rcupdate.h> |
37 | #include <linux/spinlock.h> |
38 | #include <linux/cpuhotplug.h> |
39 | #include <linux/atomic.h> |
40 | #include <linux/ktime.h> |
41 | |
42 | #ifdef CONFIG_X86 |
43 | #include <asm/desc.h> |
44 | #include <asm/ptrace.h> |
45 | #include <asm/idtentry.h> |
46 | #include <asm/irq.h> |
47 | #include <asm/io_apic.h> |
48 | #include <asm/i8259.h> |
49 | #include <asm/xen/cpuid.h> |
50 | #include <asm/xen/pci.h> |
51 | #endif |
52 | #include <asm/sync_bitops.h> |
53 | #include <asm/xen/hypercall.h> |
54 | #include <asm/xen/hypervisor.h> |
55 | #include <xen/page.h> |
56 | |
57 | #include <xen/xen.h> |
58 | #include <xen/hvm.h> |
59 | #include <xen/xen-ops.h> |
60 | #include <xen/events.h> |
61 | #include <xen/interface/xen.h> |
62 | #include <xen/interface/event_channel.h> |
63 | #include <xen/interface/hvm/hvm_op.h> |
64 | #include <xen/interface/hvm/params.h> |
65 | #include <xen/interface/physdev.h> |
66 | #include <xen/interface/sched.h> |
67 | #include <xen/interface/vcpu.h> |
68 | #include <xen/xenbus.h> |
69 | #include <asm/hw_irq.h> |
70 | |
71 | #include "events_internal.h" |
72 | |
73 | #undef MODULE_PARAM_PREFIX |
74 | #define MODULE_PARAM_PREFIX "xen." |
75 | |
76 | /* Interrupt types. */ |
77 | enum xen_irq_type { |
78 | IRQT_UNBOUND = 0, |
79 | IRQT_PIRQ, |
80 | IRQT_VIRQ, |
81 | IRQT_IPI, |
82 | IRQT_EVTCHN |
83 | }; |
84 | |
85 | /* |
86 | * Packed IRQ information: |
87 | * type - enum xen_irq_type |
88 | * event channel - irq->event channel mapping |
89 | * cpu - cpu this event channel is bound to |
90 | * index - type-specific information: |
91 | * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM |
92 | * guest, or GSI (real passthrough IRQ) of the device. |
93 | * VIRQ - virq number |
94 | * IPI - IPI vector |
95 | * EVTCHN - |
96 | */ |
97 | struct irq_info { |
98 | struct list_head list; |
99 | struct list_head eoi_list; |
100 | struct rcu_work rwork; |
101 | short refcnt; |
102 | u8 spurious_cnt; |
103 | u8 is_accounted; |
104 | short type; /* type: IRQT_* */ |
105 | u8 mask_reason; /* Why is event channel masked */ |
106 | #define EVT_MASK_REASON_EXPLICIT 0x01 |
107 | #define EVT_MASK_REASON_TEMPORARY 0x02 |
108 | #define EVT_MASK_REASON_EOI_PENDING 0x04 |
109 | u8 is_active; /* Is event just being handled? */ |
110 | unsigned irq; |
111 | evtchn_port_t evtchn; /* event channel */ |
112 | unsigned short cpu; /* cpu bound */ |
113 | unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */ |
114 | unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ |
115 | u64 eoi_time; /* Time in jiffies when to EOI. */ |
116 | raw_spinlock_t lock; |
117 | bool is_static; /* Is event channel static */ |
118 | |
119 | union { |
120 | unsigned short virq; |
121 | enum ipi_vector ipi; |
122 | struct { |
123 | unsigned short pirq; |
124 | unsigned short gsi; |
125 | unsigned char vector; |
126 | unsigned char flags; |
127 | uint16_t domid; |
128 | } pirq; |
129 | struct xenbus_device *interdomain; |
130 | } u; |
131 | }; |
132 | |
133 | #define PIRQ_NEEDS_EOI (1 << 0) |
134 | #define PIRQ_SHAREABLE (1 << 1) |
135 | #define PIRQ_MSI_GROUP (1 << 2) |
136 | |
137 | static uint __read_mostly event_loop_timeout = 2; |
138 | module_param(event_loop_timeout, uint, 0644); |
139 | |
140 | static uint __read_mostly event_eoi_delay = 10; |
141 | module_param(event_eoi_delay, uint, 0644); |
142 | |
143 | const struct evtchn_ops *evtchn_ops; |
144 | |
145 | /* |
146 | * This lock protects updates to the following mapping and reference-count |
147 | * arrays. The lock does not need to be acquired to read the mapping tables. |
148 | */ |
149 | static DEFINE_MUTEX(irq_mapping_update_lock); |
150 | |
151 | /* |
152 | * Lock hierarchy: |
153 | * |
154 | * irq_mapping_update_lock |
155 | * IRQ-desc lock |
156 | * percpu eoi_list_lock |
157 | * irq_info->lock |
158 | */ |
159 | |
160 | static LIST_HEAD(xen_irq_list_head); |
161 | |
162 | /* IRQ <-> VIRQ mapping. */ |
163 | static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; |
164 | |
165 | /* IRQ <-> IPI mapping */ |
166 | static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; |
167 | /* Cache for IPI event channels - needed for hot cpu unplug (avoid RCU usage). */ |
168 | static DEFINE_PER_CPU(evtchn_port_t [XEN_NR_IPIS], ipi_to_evtchn) = {[0 ... XEN_NR_IPIS-1] = 0}; |
169 | |
170 | /* Event channel distribution data */ |
171 | static atomic_t channels_on_cpu[NR_CPUS]; |
172 | |
173 | static int **evtchn_to_irq; |
174 | #ifdef CONFIG_X86 |
175 | static unsigned long *pirq_eoi_map; |
176 | #endif |
177 | static bool (*pirq_needs_eoi)(struct irq_info *info); |
178 | |
179 | #define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq))) |
180 | #define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq))) |
181 | #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq)) |
182 | |
183 | /* Xen will never allocate port zero for any purpose. */ |
184 | #define VALID_EVTCHN(chn) ((chn) != 0) |
185 | |
186 | static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; |
187 | |
188 | static struct irq_chip xen_dynamic_chip; |
189 | static struct irq_chip xen_lateeoi_chip; |
190 | static struct irq_chip xen_percpu_chip; |
191 | static struct irq_chip xen_pirq_chip; |
192 | static void enable_dynirq(struct irq_data *data); |
193 | |
194 | static DEFINE_PER_CPU(unsigned int, irq_epoch); |
195 | |
196 | static void clear_evtchn_to_irq_row(int *evtchn_row) |
197 | { |
198 | unsigned col; |
199 | |
200 | for (col = 0; col < EVTCHN_PER_ROW; col++) |
201 | WRITE_ONCE(evtchn_row[col], -1); |
202 | } |
203 | |
204 | static void clear_evtchn_to_irq_all(void) |
205 | { |
206 | unsigned row; |
207 | |
208 | for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) { |
209 | if (evtchn_to_irq[row] == NULL) |
210 | continue; |
211 | clear_evtchn_to_irq_row(evtchn_row: evtchn_to_irq[row]); |
212 | } |
213 | } |
214 | |
215 | static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) |
216 | { |
217 | unsigned row; |
218 | unsigned col; |
219 | int *evtchn_row; |
220 | |
221 | if (evtchn >= xen_evtchn_max_channels()) |
222 | return -EINVAL; |
223 | |
224 | row = EVTCHN_ROW(evtchn); |
225 | col = EVTCHN_COL(evtchn); |
226 | |
227 | if (evtchn_to_irq[row] == NULL) { |
228 | /* Unallocated irq entries return -1 anyway */ |
229 | if (irq == -1) |
230 | return 0; |
231 | |
232 | evtchn_row = (int *) __get_free_pages(GFP_KERNEL, order: 0); |
233 | if (evtchn_row == NULL) |
234 | return -ENOMEM; |
235 | |
236 | clear_evtchn_to_irq_row(evtchn_row); |
237 | |
238 | /* |
239 | * We've prepared an empty row for the mapping. If a different |
240 | * thread was faster inserting it, we can drop ours. |
241 | */ |
242 | if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL) |
243 | free_page((unsigned long) evtchn_row); |
244 | } |
245 | |
246 | WRITE_ONCE(evtchn_to_irq[row][col], irq); |
247 | return 0; |
248 | } |
249 | |
250 | /* Get info for IRQ */ |
251 | static struct irq_info *info_for_irq(unsigned irq) |
252 | { |
253 | if (irq < nr_legacy_irqs()) |
254 | return legacy_info_ptrs[irq]; |
255 | else |
256 | return irq_get_chip_data(irq); |
257 | } |
258 | |
259 | static void set_info_for_irq(unsigned int irq, struct irq_info *info) |
260 | { |
261 | if (irq < nr_legacy_irqs()) |
262 | legacy_info_ptrs[irq] = info; |
263 | else |
264 | irq_set_chip_data(irq, data: info); |
265 | } |
266 | |
267 | static struct irq_info *evtchn_to_info(evtchn_port_t evtchn) |
268 | { |
269 | int irq; |
270 | |
271 | if (evtchn >= xen_evtchn_max_channels()) |
272 | return NULL; |
273 | if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) |
274 | return NULL; |
275 | irq = READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); |
276 | |
277 | return (irq < 0) ? NULL : info_for_irq(irq); |
278 | } |
279 | |
280 | /* Per CPU channel accounting */ |
281 | static void channels_on_cpu_dec(struct irq_info *info) |
282 | { |
283 | if (!info->is_accounted) |
284 | return; |
285 | |
286 | info->is_accounted = 0; |
287 | |
288 | if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) |
289 | return; |
290 | |
291 | WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0)); |
292 | } |
293 | |
294 | static void channels_on_cpu_inc(struct irq_info *info) |
295 | { |
296 | if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) |
297 | return; |
298 | |
299 | if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1, |
300 | INT_MAX))) |
301 | return; |
302 | |
303 | info->is_accounted = 1; |
304 | } |
305 | |
306 | static void xen_irq_free_desc(unsigned int irq) |
307 | { |
308 | /* Legacy IRQ descriptors are managed by the arch. */ |
309 | if (irq >= nr_legacy_irqs()) |
310 | irq_free_desc(irq); |
311 | } |
312 | |
313 | static void delayed_free_irq(struct work_struct *work) |
314 | { |
315 | struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, |
316 | rwork); |
317 | unsigned int irq = info->irq; |
318 | |
319 | /* Remove the info pointer only now, with no potential users left. */ |
320 | set_info_for_irq(irq, NULL); |
321 | |
322 | kfree(objp: info); |
323 | |
324 | xen_irq_free_desc(irq); |
325 | } |
326 | |
327 | /* Constructors for packed IRQ information. */ |
328 | static int xen_irq_info_common_setup(struct irq_info *info, |
329 | enum xen_irq_type type, |
330 | evtchn_port_t evtchn, |
331 | unsigned short cpu) |
332 | { |
333 | int ret; |
334 | |
335 | BUG_ON(info->type != IRQT_UNBOUND && info->type != type); |
336 | |
337 | info->type = type; |
338 | info->evtchn = evtchn; |
339 | info->cpu = cpu; |
340 | info->mask_reason = EVT_MASK_REASON_EXPLICIT; |
341 | raw_spin_lock_init(&info->lock); |
342 | |
343 | ret = set_evtchn_to_irq(evtchn, irq: info->irq); |
344 | if (ret < 0) |
345 | return ret; |
346 | |
347 | irq_clear_status_flags(irq: info->irq, clr: IRQ_NOREQUEST | IRQ_NOAUTOEN); |
348 | |
349 | return xen_evtchn_port_setup(evtchn); |
350 | } |
351 | |
352 | static int xen_irq_info_evtchn_setup(struct irq_info *info, |
353 | evtchn_port_t evtchn, |
354 | struct xenbus_device *dev) |
355 | { |
356 | int ret; |
357 | |
358 | ret = xen_irq_info_common_setup(info, type: IRQT_EVTCHN, evtchn, cpu: 0); |
359 | info->u.interdomain = dev; |
360 | if (dev) |
361 | atomic_inc(v: &dev->event_channels); |
362 | |
363 | return ret; |
364 | } |
365 | |
366 | static int xen_irq_info_ipi_setup(struct irq_info *info, unsigned int cpu, |
367 | evtchn_port_t evtchn, enum ipi_vector ipi) |
368 | { |
369 | info->u.ipi = ipi; |
370 | |
371 | per_cpu(ipi_to_irq, cpu)[ipi] = info->irq; |
372 | per_cpu(ipi_to_evtchn, cpu)[ipi] = evtchn; |
373 | |
374 | return xen_irq_info_common_setup(info, type: IRQT_IPI, evtchn, cpu: 0); |
375 | } |
376 | |
377 | static int xen_irq_info_virq_setup(struct irq_info *info, unsigned int cpu, |
378 | evtchn_port_t evtchn, unsigned int virq) |
379 | { |
380 | info->u.virq = virq; |
381 | |
382 | per_cpu(virq_to_irq, cpu)[virq] = info->irq; |
383 | |
384 | return xen_irq_info_common_setup(info, type: IRQT_VIRQ, evtchn, cpu: 0); |
385 | } |
386 | |
387 | static int xen_irq_info_pirq_setup(struct irq_info *info, evtchn_port_t evtchn, |
388 | unsigned int pirq, unsigned int gsi, |
389 | uint16_t domid, unsigned char flags) |
390 | { |
391 | info->u.pirq.pirq = pirq; |
392 | info->u.pirq.gsi = gsi; |
393 | info->u.pirq.domid = domid; |
394 | info->u.pirq.flags = flags; |
395 | |
396 | return xen_irq_info_common_setup(info, type: IRQT_PIRQ, evtchn, cpu: 0); |
397 | } |
398 | |
399 | static void xen_irq_info_cleanup(struct irq_info *info) |
400 | { |
401 | set_evtchn_to_irq(evtchn: info->evtchn, irq: -1); |
402 | xen_evtchn_port_remove(evtchn: info->evtchn, cpu: info->cpu); |
403 | info->evtchn = 0; |
404 | channels_on_cpu_dec(info); |
405 | } |
406 | |
407 | /* |
408 | * Accessors for packed IRQ information. |
409 | */ |
410 | static evtchn_port_t evtchn_from_irq(unsigned int irq) |
411 | { |
412 | const struct irq_info *info = NULL; |
413 | |
414 | if (likely(irq < nr_irqs)) |
415 | info = info_for_irq(irq); |
416 | if (!info) |
417 | return 0; |
418 | |
419 | return info->evtchn; |
420 | } |
421 | |
422 | unsigned int irq_from_evtchn(evtchn_port_t evtchn) |
423 | { |
424 | struct irq_info *info = evtchn_to_info(evtchn); |
425 | |
426 | return info ? info->irq : -1; |
427 | } |
428 | EXPORT_SYMBOL_GPL(irq_from_evtchn); |
429 | |
430 | int irq_evtchn_from_virq(unsigned int cpu, unsigned int virq, |
431 | evtchn_port_t *evtchn) |
432 | { |
433 | int irq = per_cpu(virq_to_irq, cpu)[virq]; |
434 | |
435 | *evtchn = evtchn_from_irq(irq); |
436 | |
437 | return irq; |
438 | } |
439 | |
440 | static enum ipi_vector ipi_from_irq(struct irq_info *info) |
441 | { |
442 | BUG_ON(info == NULL); |
443 | BUG_ON(info->type != IRQT_IPI); |
444 | |
445 | return info->u.ipi; |
446 | } |
447 | |
448 | static unsigned int virq_from_irq(struct irq_info *info) |
449 | { |
450 | BUG_ON(info == NULL); |
451 | BUG_ON(info->type != IRQT_VIRQ); |
452 | |
453 | return info->u.virq; |
454 | } |
455 | |
456 | static unsigned int pirq_from_irq(struct irq_info *info) |
457 | { |
458 | BUG_ON(info == NULL); |
459 | BUG_ON(info->type != IRQT_PIRQ); |
460 | |
461 | return info->u.pirq.pirq; |
462 | } |
463 | |
464 | unsigned int cpu_from_evtchn(evtchn_port_t evtchn) |
465 | { |
466 | struct irq_info *info = evtchn_to_info(evtchn); |
467 | |
468 | return info ? info->cpu : 0; |
469 | } |
470 | |
471 | static void do_mask(struct irq_info *info, u8 reason) |
472 | { |
473 | unsigned long flags; |
474 | |
475 | raw_spin_lock_irqsave(&info->lock, flags); |
476 | |
477 | if (!info->mask_reason) |
478 | mask_evtchn(port: info->evtchn); |
479 | |
480 | info->mask_reason |= reason; |
481 | |
482 | raw_spin_unlock_irqrestore(&info->lock, flags); |
483 | } |
484 | |
485 | static void do_unmask(struct irq_info *info, u8 reason) |
486 | { |
487 | unsigned long flags; |
488 | |
489 | raw_spin_lock_irqsave(&info->lock, flags); |
490 | |
491 | info->mask_reason &= ~reason; |
492 | |
493 | if (!info->mask_reason) |
494 | unmask_evtchn(port: info->evtchn); |
495 | |
496 | raw_spin_unlock_irqrestore(&info->lock, flags); |
497 | } |
498 | |
499 | #ifdef CONFIG_X86 |
500 | static bool pirq_check_eoi_map(struct irq_info *info) |
501 | { |
502 | return test_bit(pirq_from_irq(info), pirq_eoi_map); |
503 | } |
504 | #endif |
505 | |
506 | static bool pirq_needs_eoi_flag(struct irq_info *info) |
507 | { |
508 | BUG_ON(info->type != IRQT_PIRQ); |
509 | |
510 | return info->u.pirq.flags & PIRQ_NEEDS_EOI; |
511 | } |
512 | |
513 | static void bind_evtchn_to_cpu(struct irq_info *info, unsigned int cpu, |
514 | bool force_affinity) |
515 | { |
516 | if (IS_ENABLED(CONFIG_SMP) && force_affinity) { |
517 | struct irq_data *data = irq_get_irq_data(irq: info->irq); |
518 | |
519 | irq_data_update_affinity(d: data, cpumask_of(cpu)); |
520 | irq_data_update_effective_affinity(d: data, cpumask_of(cpu)); |
521 | } |
522 | |
523 | xen_evtchn_port_bind_to_cpu(evtchn: info->evtchn, cpu, old_cpu: info->cpu); |
524 | |
525 | channels_on_cpu_dec(info); |
526 | info->cpu = cpu; |
527 | channels_on_cpu_inc(info); |
528 | } |
529 | |
530 | /** |
531 | * notify_remote_via_irq - send event to remote end of event channel via irq |
532 | * @irq: irq of event channel to send event to |
533 | * |
534 | * Unlike notify_remote_via_evtchn(), this is safe to use across |
535 | * save/restore. Notifications on a broken connection are silently |
536 | * dropped. |
537 | */ |
538 | void notify_remote_via_irq(int irq) |
539 | { |
540 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
541 | |
542 | if (VALID_EVTCHN(evtchn)) |
543 | notify_remote_via_evtchn(port: evtchn); |
544 | } |
545 | EXPORT_SYMBOL_GPL(notify_remote_via_irq); |
546 | |
547 | struct lateeoi_work { |
548 | struct delayed_work delayed; |
549 | spinlock_t eoi_list_lock; |
550 | struct list_head eoi_list; |
551 | }; |
552 | |
553 | static DEFINE_PER_CPU(struct lateeoi_work, lateeoi); |
554 | |
555 | static void lateeoi_list_del(struct irq_info *info) |
556 | { |
557 | struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); |
558 | unsigned long flags; |
559 | |
560 | spin_lock_irqsave(&eoi->eoi_list_lock, flags); |
561 | list_del_init(entry: &info->eoi_list); |
562 | spin_unlock_irqrestore(lock: &eoi->eoi_list_lock, flags); |
563 | } |
564 | |
565 | static void lateeoi_list_add(struct irq_info *info) |
566 | { |
567 | struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); |
568 | struct irq_info *elem; |
569 | u64 now = get_jiffies_64(); |
570 | unsigned long delay; |
571 | unsigned long flags; |
572 | |
573 | if (now < info->eoi_time) |
574 | delay = info->eoi_time - now; |
575 | else |
576 | delay = 1; |
577 | |
578 | spin_lock_irqsave(&eoi->eoi_list_lock, flags); |
579 | |
580 | elem = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, |
581 | eoi_list); |
582 | if (!elem || info->eoi_time < elem->eoi_time) { |
583 | list_add(new: &info->eoi_list, head: &eoi->eoi_list); |
584 | mod_delayed_work_on(cpu: info->eoi_cpu, wq: system_wq, |
585 | dwork: &eoi->delayed, delay); |
586 | } else { |
587 | list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) { |
588 | if (elem->eoi_time <= info->eoi_time) |
589 | break; |
590 | } |
591 | list_add(new: &info->eoi_list, head: &elem->eoi_list); |
592 | } |
593 | |
594 | spin_unlock_irqrestore(lock: &eoi->eoi_list_lock, flags); |
595 | } |
596 | |
597 | static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) |
598 | { |
599 | evtchn_port_t evtchn; |
600 | unsigned int cpu; |
601 | unsigned int delay = 0; |
602 | |
603 | evtchn = info->evtchn; |
604 | if (!VALID_EVTCHN(evtchn) || !list_empty(head: &info->eoi_list)) |
605 | return; |
606 | |
607 | if (spurious) { |
608 | struct xenbus_device *dev = info->u.interdomain; |
609 | unsigned int threshold = 1; |
610 | |
611 | if (dev && dev->spurious_threshold) |
612 | threshold = dev->spurious_threshold; |
613 | |
614 | if ((1 << info->spurious_cnt) < (HZ << 2)) { |
615 | if (info->spurious_cnt != 0xFF) |
616 | info->spurious_cnt++; |
617 | } |
618 | if (info->spurious_cnt > threshold) { |
619 | delay = 1 << (info->spurious_cnt - 1 - threshold); |
620 | if (delay > HZ) |
621 | delay = HZ; |
622 | if (!info->eoi_time) |
623 | info->eoi_cpu = smp_processor_id(); |
624 | info->eoi_time = get_jiffies_64() + delay; |
625 | if (dev) |
626 | atomic_add(i: delay, v: &dev->jiffies_eoi_delayed); |
627 | } |
628 | if (dev) |
629 | atomic_inc(v: &dev->spurious_events); |
630 | } else { |
631 | info->spurious_cnt = 0; |
632 | } |
633 | |
634 | cpu = info->eoi_cpu; |
635 | if (info->eoi_time && |
636 | (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) { |
637 | lateeoi_list_add(info); |
638 | return; |
639 | } |
640 | |
641 | info->eoi_time = 0; |
642 | |
643 | /* is_active hasn't been reset yet, do it now. */ |
644 | smp_store_release(&info->is_active, 0); |
645 | do_unmask(info, EVT_MASK_REASON_EOI_PENDING); |
646 | } |
647 | |
648 | static void xen_irq_lateeoi_worker(struct work_struct *work) |
649 | { |
650 | struct lateeoi_work *eoi; |
651 | struct irq_info *info; |
652 | u64 now = get_jiffies_64(); |
653 | unsigned long flags; |
654 | |
655 | eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); |
656 | |
657 | rcu_read_lock(); |
658 | |
659 | while (true) { |
660 | spin_lock_irqsave(&eoi->eoi_list_lock, flags); |
661 | |
662 | info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, |
663 | eoi_list); |
664 | |
665 | if (info == NULL) |
666 | break; |
667 | |
668 | if (now < info->eoi_time) { |
669 | mod_delayed_work_on(cpu: info->eoi_cpu, wq: system_wq, |
670 | dwork: &eoi->delayed, |
671 | delay: info->eoi_time - now); |
672 | break; |
673 | } |
674 | |
675 | list_del_init(entry: &info->eoi_list); |
676 | |
677 | spin_unlock_irqrestore(lock: &eoi->eoi_list_lock, flags); |
678 | |
679 | info->eoi_time = 0; |
680 | |
681 | xen_irq_lateeoi_locked(info, spurious: false); |
682 | } |
683 | |
684 | spin_unlock_irqrestore(lock: &eoi->eoi_list_lock, flags); |
685 | |
686 | rcu_read_unlock(); |
687 | } |
688 | |
689 | static void xen_cpu_init_eoi(unsigned int cpu) |
690 | { |
691 | struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu); |
692 | |
693 | INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker); |
694 | spin_lock_init(&eoi->eoi_list_lock); |
695 | INIT_LIST_HEAD(list: &eoi->eoi_list); |
696 | } |
697 | |
698 | void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) |
699 | { |
700 | struct irq_info *info; |
701 | |
702 | rcu_read_lock(); |
703 | |
704 | info = info_for_irq(irq); |
705 | |
706 | if (info) |
707 | xen_irq_lateeoi_locked(info, spurious: eoi_flags & XEN_EOI_FLAG_SPURIOUS); |
708 | |
709 | rcu_read_unlock(); |
710 | } |
711 | EXPORT_SYMBOL_GPL(xen_irq_lateeoi); |
712 | |
713 | static struct irq_info *xen_irq_init(unsigned int irq) |
714 | { |
715 | struct irq_info *info; |
716 | |
717 | info = kzalloc(size: sizeof(*info), GFP_KERNEL); |
718 | if (info) { |
719 | info->irq = irq; |
720 | info->type = IRQT_UNBOUND; |
721 | info->refcnt = -1; |
722 | INIT_RCU_WORK(&info->rwork, delayed_free_irq); |
723 | |
724 | set_info_for_irq(irq, info); |
725 | /* |
726 | * Interrupt affinity setting can be immediate. No point |
727 | * in delaying it until an interrupt is handled. |
728 | */ |
729 | irq_set_status_flags(irq, set: IRQ_MOVE_PCNTXT); |
730 | |
731 | INIT_LIST_HEAD(list: &info->eoi_list); |
732 | list_add_tail(new: &info->list, head: &xen_irq_list_head); |
733 | } |
734 | |
735 | return info; |
736 | } |
737 | |
738 | static struct irq_info *xen_allocate_irq_dynamic(void) |
739 | { |
740 | int irq = irq_alloc_desc_from(0, -1); |
741 | struct irq_info *info = NULL; |
742 | |
743 | if (irq >= 0) { |
744 | info = xen_irq_init(irq); |
745 | if (!info) |
746 | xen_irq_free_desc(irq); |
747 | } |
748 | |
749 | return info; |
750 | } |
751 | |
752 | static struct irq_info *xen_allocate_irq_gsi(unsigned int gsi) |
753 | { |
754 | int irq; |
755 | struct irq_info *info; |
756 | |
757 | /* |
758 | * A PV guest has no concept of a GSI (since it has no ACPI |
759 | * nor access to/knowledge of the physical APICs). Therefore |
760 | * all IRQs are dynamically allocated from the entire IRQ |
761 | * space. |
762 | */ |
763 | if (xen_pv_domain() && !xen_initial_domain()) |
764 | return xen_allocate_irq_dynamic(); |
765 | |
766 | /* Legacy IRQ descriptors are already allocated by the arch. */ |
767 | if (gsi < nr_legacy_irqs()) |
768 | irq = gsi; |
769 | else |
770 | irq = irq_alloc_desc_at(gsi, -1); |
771 | |
772 | info = xen_irq_init(irq); |
773 | if (!info) |
774 | xen_irq_free_desc(irq); |
775 | |
776 | return info; |
777 | } |
778 | |
779 | static void xen_free_irq(struct irq_info *info) |
780 | { |
781 | if (WARN_ON(!info)) |
782 | return; |
783 | |
784 | if (!list_empty(head: &info->eoi_list)) |
785 | lateeoi_list_del(info); |
786 | |
787 | list_del(entry: &info->list); |
788 | |
789 | WARN_ON(info->refcnt > 0); |
790 | |
791 | queue_rcu_work(wq: system_wq, rwork: &info->rwork); |
792 | } |
793 | |
794 | /* Not called for lateeoi events. */ |
795 | static void event_handler_exit(struct irq_info *info) |
796 | { |
797 | smp_store_release(&info->is_active, 0); |
798 | clear_evtchn(port: info->evtchn); |
799 | } |
800 | |
801 | static void pirq_query_unmask(struct irq_info *info) |
802 | { |
803 | struct physdev_irq_status_query irq_status; |
804 | |
805 | irq_status.irq = pirq_from_irq(info); |
806 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, arg: &irq_status)) |
807 | irq_status.flags = 0; |
808 | |
809 | info->u.pirq.flags &= ~PIRQ_NEEDS_EOI; |
810 | if (irq_status.flags & XENIRQSTAT_needs_eoi) |
811 | info->u.pirq.flags |= PIRQ_NEEDS_EOI; |
812 | } |
813 | |
814 | static void do_eoi_pirq(struct irq_info *info) |
815 | { |
816 | struct physdev_eoi eoi = { .irq = pirq_from_irq(info) }; |
817 | int rc = 0; |
818 | |
819 | if (!VALID_EVTCHN(info->evtchn)) |
820 | return; |
821 | |
822 | event_handler_exit(info); |
823 | |
824 | if (pirq_needs_eoi(info)) { |
825 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, arg: &eoi); |
826 | WARN_ON(rc); |
827 | } |
828 | } |
829 | |
830 | static void eoi_pirq(struct irq_data *data) |
831 | { |
832 | struct irq_info *info = info_for_irq(irq: data->irq); |
833 | |
834 | do_eoi_pirq(info); |
835 | } |
836 | |
837 | static void do_disable_dynirq(struct irq_info *info) |
838 | { |
839 | if (VALID_EVTCHN(info->evtchn)) |
840 | do_mask(info, EVT_MASK_REASON_EXPLICIT); |
841 | } |
842 | |
843 | static void disable_dynirq(struct irq_data *data) |
844 | { |
845 | struct irq_info *info = info_for_irq(irq: data->irq); |
846 | |
847 | if (info) |
848 | do_disable_dynirq(info); |
849 | } |
850 | |
851 | static void mask_ack_pirq(struct irq_data *data) |
852 | { |
853 | struct irq_info *info = info_for_irq(irq: data->irq); |
854 | |
855 | if (info) { |
856 | do_disable_dynirq(info); |
857 | do_eoi_pirq(info); |
858 | } |
859 | } |
860 | |
861 | static unsigned int __startup_pirq(struct irq_info *info) |
862 | { |
863 | struct evtchn_bind_pirq bind_pirq; |
864 | evtchn_port_t evtchn = info->evtchn; |
865 | int rc; |
866 | |
867 | if (VALID_EVTCHN(evtchn)) |
868 | goto out; |
869 | |
870 | bind_pirq.pirq = pirq_from_irq(info); |
871 | /* NB. We are happy to share unless we are probing. */ |
872 | bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? |
873 | BIND_PIRQ__WILL_SHARE : 0; |
874 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, arg: &bind_pirq); |
875 | if (rc != 0) { |
876 | pr_warn("Failed to obtain physical IRQ %d\n" , info->irq); |
877 | return 0; |
878 | } |
879 | evtchn = bind_pirq.port; |
880 | |
881 | pirq_query_unmask(info); |
882 | |
883 | rc = set_evtchn_to_irq(evtchn, irq: info->irq); |
884 | if (rc) |
885 | goto err; |
886 | |
887 | info->evtchn = evtchn; |
888 | bind_evtchn_to_cpu(info, cpu: 0, force_affinity: false); |
889 | |
890 | rc = xen_evtchn_port_setup(evtchn); |
891 | if (rc) |
892 | goto err; |
893 | |
894 | out: |
895 | do_unmask(info, EVT_MASK_REASON_EXPLICIT); |
896 | |
897 | do_eoi_pirq(info); |
898 | |
899 | return 0; |
900 | |
901 | err: |
902 | pr_err("irq%d: Failed to set port to irq mapping (%d)\n" , info->irq, |
903 | rc); |
904 | xen_evtchn_close(port: evtchn); |
905 | return 0; |
906 | } |
907 | |
908 | static unsigned int startup_pirq(struct irq_data *data) |
909 | { |
910 | struct irq_info *info = info_for_irq(irq: data->irq); |
911 | |
912 | return __startup_pirq(info); |
913 | } |
914 | |
915 | static void shutdown_pirq(struct irq_data *data) |
916 | { |
917 | struct irq_info *info = info_for_irq(irq: data->irq); |
918 | evtchn_port_t evtchn = info->evtchn; |
919 | |
920 | BUG_ON(info->type != IRQT_PIRQ); |
921 | |
922 | if (!VALID_EVTCHN(evtchn)) |
923 | return; |
924 | |
925 | do_mask(info, EVT_MASK_REASON_EXPLICIT); |
926 | xen_irq_info_cleanup(info); |
927 | xen_evtchn_close(port: evtchn); |
928 | } |
929 | |
930 | static void enable_pirq(struct irq_data *data) |
931 | { |
932 | enable_dynirq(data); |
933 | } |
934 | |
935 | static void disable_pirq(struct irq_data *data) |
936 | { |
937 | disable_dynirq(data); |
938 | } |
939 | |
940 | int xen_irq_from_gsi(unsigned gsi) |
941 | { |
942 | struct irq_info *info; |
943 | |
944 | list_for_each_entry(info, &xen_irq_list_head, list) { |
945 | if (info->type != IRQT_PIRQ) |
946 | continue; |
947 | |
948 | if (info->u.pirq.gsi == gsi) |
949 | return info->irq; |
950 | } |
951 | |
952 | return -1; |
953 | } |
954 | EXPORT_SYMBOL_GPL(xen_irq_from_gsi); |
955 | |
956 | static void __unbind_from_irq(struct irq_info *info, unsigned int irq) |
957 | { |
958 | evtchn_port_t evtchn; |
959 | bool close_evtchn = false; |
960 | |
961 | if (!info) { |
962 | xen_irq_free_desc(irq); |
963 | return; |
964 | } |
965 | |
966 | if (info->refcnt > 0) { |
967 | info->refcnt--; |
968 | if (info->refcnt != 0) |
969 | return; |
970 | } |
971 | |
972 | evtchn = info->evtchn; |
973 | |
974 | if (VALID_EVTCHN(evtchn)) { |
975 | unsigned int cpu = info->cpu; |
976 | struct xenbus_device *dev; |
977 | |
978 | if (!info->is_static) |
979 | close_evtchn = true; |
980 | |
981 | switch (info->type) { |
982 | case IRQT_VIRQ: |
983 | per_cpu(virq_to_irq, cpu)[virq_from_irq(info)] = -1; |
984 | break; |
985 | case IRQT_IPI: |
986 | per_cpu(ipi_to_irq, cpu)[ipi_from_irq(info)] = -1; |
987 | per_cpu(ipi_to_evtchn, cpu)[ipi_from_irq(info)] = 0; |
988 | break; |
989 | case IRQT_EVTCHN: |
990 | dev = info->u.interdomain; |
991 | if (dev) |
992 | atomic_dec(v: &dev->event_channels); |
993 | break; |
994 | default: |
995 | break; |
996 | } |
997 | |
998 | xen_irq_info_cleanup(info); |
999 | |
1000 | if (close_evtchn) |
1001 | xen_evtchn_close(port: evtchn); |
1002 | } |
1003 | |
1004 | xen_free_irq(info); |
1005 | } |
1006 | |
1007 | /* |
1008 | * Do not make any assumptions regarding the relationship between the |
1009 | * IRQ number returned here and the Xen pirq argument. |
1010 | * |
1011 | * Note: We don't assign an event channel until the irq actually started |
1012 | * up. Return an existing irq if we've already got one for the gsi. |
1013 | * |
1014 | * Shareable implies level triggered, not shareable implies edge |
1015 | * triggered here. |
1016 | */ |
1017 | int xen_bind_pirq_gsi_to_irq(unsigned gsi, |
1018 | unsigned pirq, int shareable, char *name) |
1019 | { |
1020 | struct irq_info *info; |
1021 | struct physdev_irq irq_op; |
1022 | int ret; |
1023 | |
1024 | mutex_lock(&irq_mapping_update_lock); |
1025 | |
1026 | ret = xen_irq_from_gsi(gsi); |
1027 | if (ret != -1) { |
1028 | pr_info("%s: returning irq %d for gsi %u\n" , |
1029 | __func__, ret, gsi); |
1030 | goto out; |
1031 | } |
1032 | |
1033 | info = xen_allocate_irq_gsi(gsi); |
1034 | if (!info) |
1035 | goto out; |
1036 | |
1037 | irq_op.irq = info->irq; |
1038 | irq_op.vector = 0; |
1039 | |
1040 | /* Only the privileged domain can do this. For non-priv, the pcifront |
1041 | * driver provides a PCI bus that does the call to do exactly |
1042 | * this in the priv domain. */ |
1043 | if (xen_initial_domain() && |
1044 | HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, arg: &irq_op)) { |
1045 | xen_free_irq(info); |
1046 | ret = -ENOSPC; |
1047 | goto out; |
1048 | } |
1049 | |
1050 | ret = xen_irq_info_pirq_setup(info, evtchn: 0, pirq, gsi, DOMID_SELF, |
1051 | flags: shareable ? PIRQ_SHAREABLE : 0); |
1052 | if (ret < 0) { |
1053 | __unbind_from_irq(info, irq: info->irq); |
1054 | goto out; |
1055 | } |
1056 | |
1057 | pirq_query_unmask(info); |
1058 | /* We try to use the handler with the appropriate semantic for the |
1059 | * type of interrupt: if the interrupt is an edge triggered |
1060 | * interrupt we use handle_edge_irq. |
1061 | * |
1062 | * On the other hand if the interrupt is level triggered we use |
1063 | * handle_fasteoi_irq like the native code does for this kind of |
1064 | * interrupts. |
1065 | * |
1066 | * Depending on the Xen version, pirq_needs_eoi might return true |
1067 | * not only for level triggered interrupts but for edge triggered |
1068 | * interrupts too. In any case Xen always honors the eoi mechanism, |
1069 | * not injecting any more pirqs of the same kind if the first one |
1070 | * hasn't received an eoi yet. Therefore using the fasteoi handler |
1071 | * is the right choice either way. |
1072 | */ |
1073 | if (shareable) |
1074 | irq_set_chip_and_handler_name(irq: info->irq, chip: &xen_pirq_chip, |
1075 | handle: handle_fasteoi_irq, name); |
1076 | else |
1077 | irq_set_chip_and_handler_name(irq: info->irq, chip: &xen_pirq_chip, |
1078 | handle: handle_edge_irq, name); |
1079 | |
1080 | ret = info->irq; |
1081 | |
1082 | out: |
1083 | mutex_unlock(lock: &irq_mapping_update_lock); |
1084 | |
1085 | return ret; |
1086 | } |
1087 | |
1088 | #ifdef CONFIG_PCI_MSI |
1089 | int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) |
1090 | { |
1091 | int rc; |
1092 | struct physdev_get_free_pirq op_get_free_pirq; |
1093 | |
1094 | op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI; |
1095 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, arg: &op_get_free_pirq); |
1096 | |
1097 | WARN_ONCE(rc == -ENOSYS, |
1098 | "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n" ); |
1099 | |
1100 | return rc ? -1 : op_get_free_pirq.pirq; |
1101 | } |
1102 | |
1103 | int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, |
1104 | int pirq, int nvec, const char *name, domid_t domid) |
1105 | { |
1106 | int i, irq, ret; |
1107 | struct irq_info *info; |
1108 | |
1109 | mutex_lock(&irq_mapping_update_lock); |
1110 | |
1111 | irq = irq_alloc_descs(-1, 0, nvec, -1); |
1112 | if (irq < 0) |
1113 | goto out; |
1114 | |
1115 | for (i = 0; i < nvec; i++) { |
1116 | info = xen_irq_init(irq: irq + i); |
1117 | if (!info) { |
1118 | ret = -ENOMEM; |
1119 | goto error_irq; |
1120 | } |
1121 | |
1122 | irq_set_chip_and_handler_name(irq: irq + i, chip: &xen_pirq_chip, handle: handle_edge_irq, name); |
1123 | |
1124 | ret = xen_irq_info_pirq_setup(info, evtchn: 0, pirq: pirq + i, gsi: 0, domid, |
1125 | flags: i == 0 ? 0 : PIRQ_MSI_GROUP); |
1126 | if (ret < 0) |
1127 | goto error_irq; |
1128 | } |
1129 | |
1130 | ret = irq_set_msi_desc(irq, entry: msidesc); |
1131 | if (ret < 0) |
1132 | goto error_irq; |
1133 | out: |
1134 | mutex_unlock(lock: &irq_mapping_update_lock); |
1135 | return irq; |
1136 | |
1137 | error_irq: |
1138 | while (nvec--) { |
1139 | info = info_for_irq(irq: irq + nvec); |
1140 | __unbind_from_irq(info, irq: irq + nvec); |
1141 | } |
1142 | mutex_unlock(lock: &irq_mapping_update_lock); |
1143 | return ret; |
1144 | } |
1145 | #endif |
1146 | |
1147 | int xen_destroy_irq(int irq) |
1148 | { |
1149 | struct physdev_unmap_pirq unmap_irq; |
1150 | struct irq_info *info = info_for_irq(irq); |
1151 | int rc = -ENOENT; |
1152 | |
1153 | mutex_lock(&irq_mapping_update_lock); |
1154 | |
1155 | /* |
1156 | * If trying to remove a vector in a MSI group different |
1157 | * than the first one skip the PIRQ unmap unless this vector |
1158 | * is the first one in the group. |
1159 | */ |
1160 | if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) { |
1161 | unmap_irq.pirq = info->u.pirq.pirq; |
1162 | unmap_irq.domid = info->u.pirq.domid; |
1163 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, arg: &unmap_irq); |
1164 | /* If another domain quits without making the pci_disable_msix |
1165 | * call, the Xen hypervisor takes care of freeing the PIRQs |
1166 | * (free_domain_pirqs). |
1167 | */ |
1168 | if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF)) |
1169 | pr_info("domain %d does not have %d anymore\n" , |
1170 | info->u.pirq.domid, info->u.pirq.pirq); |
1171 | else if (rc) { |
1172 | pr_warn("unmap irq failed %d\n" , rc); |
1173 | goto out; |
1174 | } |
1175 | } |
1176 | |
1177 | xen_free_irq(info); |
1178 | |
1179 | out: |
1180 | mutex_unlock(lock: &irq_mapping_update_lock); |
1181 | return rc; |
1182 | } |
1183 | |
1184 | int xen_pirq_from_irq(unsigned irq) |
1185 | { |
1186 | struct irq_info *info = info_for_irq(irq); |
1187 | |
1188 | return pirq_from_irq(info); |
1189 | } |
1190 | EXPORT_SYMBOL_GPL(xen_pirq_from_irq); |
1191 | |
1192 | static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, |
1193 | struct xenbus_device *dev, bool shared) |
1194 | { |
1195 | int ret = -ENOMEM; |
1196 | struct irq_info *info; |
1197 | |
1198 | if (evtchn >= xen_evtchn_max_channels()) |
1199 | return -ENOMEM; |
1200 | |
1201 | mutex_lock(&irq_mapping_update_lock); |
1202 | |
1203 | info = evtchn_to_info(evtchn); |
1204 | |
1205 | if (!info) { |
1206 | info = xen_allocate_irq_dynamic(); |
1207 | if (!info) |
1208 | goto out; |
1209 | |
1210 | irq_set_chip_and_handler_name(irq: info->irq, chip, |
1211 | handle: handle_edge_irq, name: "event" ); |
1212 | |
1213 | ret = xen_irq_info_evtchn_setup(info, evtchn, dev); |
1214 | if (ret < 0) { |
1215 | __unbind_from_irq(info, irq: info->irq); |
1216 | goto out; |
1217 | } |
1218 | /* |
1219 | * New interdomain events are initially bound to vCPU0 This |
1220 | * is required to setup the event channel in the first |
1221 | * place and also important for UP guests because the |
1222 | * affinity setting is not invoked on them so nothing would |
1223 | * bind the channel. |
1224 | */ |
1225 | bind_evtchn_to_cpu(info, cpu: 0, force_affinity: false); |
1226 | } else if (!WARN_ON(info->type != IRQT_EVTCHN)) { |
1227 | if (shared && !WARN_ON(info->refcnt < 0)) |
1228 | info->refcnt++; |
1229 | } |
1230 | |
1231 | ret = info->irq; |
1232 | |
1233 | out: |
1234 | mutex_unlock(lock: &irq_mapping_update_lock); |
1235 | |
1236 | return ret; |
1237 | } |
1238 | |
1239 | int bind_evtchn_to_irq(evtchn_port_t evtchn) |
1240 | { |
1241 | return bind_evtchn_to_irq_chip(evtchn, chip: &xen_dynamic_chip, NULL, shared: false); |
1242 | } |
1243 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); |
1244 | |
1245 | int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) |
1246 | { |
1247 | return bind_evtchn_to_irq_chip(evtchn, chip: &xen_lateeoi_chip, NULL, shared: false); |
1248 | } |
1249 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); |
1250 | |
1251 | static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) |
1252 | { |
1253 | struct evtchn_bind_ipi bind_ipi; |
1254 | evtchn_port_t evtchn; |
1255 | struct irq_info *info; |
1256 | int ret; |
1257 | |
1258 | mutex_lock(&irq_mapping_update_lock); |
1259 | |
1260 | ret = per_cpu(ipi_to_irq, cpu)[ipi]; |
1261 | |
1262 | if (ret == -1) { |
1263 | info = xen_allocate_irq_dynamic(); |
1264 | if (!info) |
1265 | goto out; |
1266 | |
1267 | irq_set_chip_and_handler_name(irq: info->irq, chip: &xen_percpu_chip, |
1268 | handle: handle_percpu_irq, name: "ipi" ); |
1269 | |
1270 | bind_ipi.vcpu = xen_vcpu_nr(cpu); |
1271 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, |
1272 | arg: &bind_ipi) != 0) |
1273 | BUG(); |
1274 | evtchn = bind_ipi.port; |
1275 | |
1276 | ret = xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); |
1277 | if (ret < 0) { |
1278 | __unbind_from_irq(info, irq: info->irq); |
1279 | goto out; |
1280 | } |
1281 | /* |
1282 | * Force the affinity mask to the target CPU so proc shows |
1283 | * the correct target. |
1284 | */ |
1285 | bind_evtchn_to_cpu(info, cpu, force_affinity: true); |
1286 | ret = info->irq; |
1287 | } else { |
1288 | info = info_for_irq(irq: ret); |
1289 | WARN_ON(info == NULL || info->type != IRQT_IPI); |
1290 | } |
1291 | |
1292 | out: |
1293 | mutex_unlock(lock: &irq_mapping_update_lock); |
1294 | return ret; |
1295 | } |
1296 | |
1297 | static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev, |
1298 | evtchn_port_t remote_port, |
1299 | struct irq_chip *chip, |
1300 | bool shared) |
1301 | { |
1302 | struct evtchn_bind_interdomain bind_interdomain; |
1303 | int err; |
1304 | |
1305 | bind_interdomain.remote_dom = dev->otherend_id; |
1306 | bind_interdomain.remote_port = remote_port; |
1307 | |
1308 | err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, |
1309 | arg: &bind_interdomain); |
1310 | |
1311 | return err ? : bind_evtchn_to_irq_chip(evtchn: bind_interdomain.local_port, |
1312 | chip, dev, shared); |
1313 | } |
1314 | |
1315 | int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev, |
1316 | evtchn_port_t remote_port) |
1317 | { |
1318 | return bind_interdomain_evtchn_to_irq_chip(dev, remote_port, |
1319 | chip: &xen_lateeoi_chip, shared: false); |
1320 | } |
1321 | EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); |
1322 | |
1323 | static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) |
1324 | { |
1325 | struct evtchn_status status; |
1326 | evtchn_port_t port; |
1327 | int rc = -ENOENT; |
1328 | |
1329 | memset(&status, 0, sizeof(status)); |
1330 | for (port = 0; port < xen_evtchn_max_channels(); port++) { |
1331 | status.dom = DOMID_SELF; |
1332 | status.port = port; |
1333 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, arg: &status); |
1334 | if (rc < 0) |
1335 | continue; |
1336 | if (status.status != EVTCHNSTAT_virq) |
1337 | continue; |
1338 | if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) { |
1339 | *evtchn = port; |
1340 | break; |
1341 | } |
1342 | } |
1343 | return rc; |
1344 | } |
1345 | |
1346 | /** |
1347 | * xen_evtchn_nr_channels - number of usable event channel ports |
1348 | * |
1349 | * This may be less than the maximum supported by the current |
1350 | * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum |
1351 | * supported. |
1352 | */ |
1353 | unsigned xen_evtchn_nr_channels(void) |
1354 | { |
1355 | return evtchn_ops->nr_channels(); |
1356 | } |
1357 | EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels); |
1358 | |
1359 | int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) |
1360 | { |
1361 | struct evtchn_bind_virq bind_virq; |
1362 | evtchn_port_t evtchn = 0; |
1363 | struct irq_info *info; |
1364 | int ret; |
1365 | |
1366 | mutex_lock(&irq_mapping_update_lock); |
1367 | |
1368 | ret = per_cpu(virq_to_irq, cpu)[virq]; |
1369 | |
1370 | if (ret == -1) { |
1371 | info = xen_allocate_irq_dynamic(); |
1372 | if (!info) |
1373 | goto out; |
1374 | |
1375 | if (percpu) |
1376 | irq_set_chip_and_handler_name(irq: info->irq, chip: &xen_percpu_chip, |
1377 | handle: handle_percpu_irq, name: "virq" ); |
1378 | else |
1379 | irq_set_chip_and_handler_name(irq: info->irq, chip: &xen_dynamic_chip, |
1380 | handle: handle_edge_irq, name: "virq" ); |
1381 | |
1382 | bind_virq.virq = virq; |
1383 | bind_virq.vcpu = xen_vcpu_nr(cpu); |
1384 | ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, |
1385 | arg: &bind_virq); |
1386 | if (ret == 0) |
1387 | evtchn = bind_virq.port; |
1388 | else { |
1389 | if (ret == -EEXIST) |
1390 | ret = find_virq(virq, cpu, evtchn: &evtchn); |
1391 | BUG_ON(ret < 0); |
1392 | } |
1393 | |
1394 | ret = xen_irq_info_virq_setup(info, cpu, evtchn, virq); |
1395 | if (ret < 0) { |
1396 | __unbind_from_irq(info, irq: info->irq); |
1397 | goto out; |
1398 | } |
1399 | |
1400 | /* |
1401 | * Force the affinity mask for percpu interrupts so proc |
1402 | * shows the correct target. |
1403 | */ |
1404 | bind_evtchn_to_cpu(info, cpu, force_affinity: percpu); |
1405 | ret = info->irq; |
1406 | } else { |
1407 | info = info_for_irq(irq: ret); |
1408 | WARN_ON(info == NULL || info->type != IRQT_VIRQ); |
1409 | } |
1410 | |
1411 | out: |
1412 | mutex_unlock(lock: &irq_mapping_update_lock); |
1413 | |
1414 | return ret; |
1415 | } |
1416 | |
1417 | static void unbind_from_irq(unsigned int irq) |
1418 | { |
1419 | struct irq_info *info; |
1420 | |
1421 | mutex_lock(&irq_mapping_update_lock); |
1422 | info = info_for_irq(irq); |
1423 | __unbind_from_irq(info, irq); |
1424 | mutex_unlock(lock: &irq_mapping_update_lock); |
1425 | } |
1426 | |
1427 | static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, |
1428 | irq_handler_t handler, |
1429 | unsigned long irqflags, |
1430 | const char *devname, void *dev_id, |
1431 | struct irq_chip *chip) |
1432 | { |
1433 | int irq, retval; |
1434 | |
1435 | irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL, |
1436 | shared: irqflags & IRQF_SHARED); |
1437 | if (irq < 0) |
1438 | return irq; |
1439 | retval = request_irq(irq, handler, flags: irqflags, name: devname, dev: dev_id); |
1440 | if (retval != 0) { |
1441 | unbind_from_irq(irq); |
1442 | return retval; |
1443 | } |
1444 | |
1445 | return irq; |
1446 | } |
1447 | |
1448 | int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, |
1449 | irq_handler_t handler, |
1450 | unsigned long irqflags, |
1451 | const char *devname, void *dev_id) |
1452 | { |
1453 | return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, |
1454 | devname, dev_id, |
1455 | chip: &xen_dynamic_chip); |
1456 | } |
1457 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); |
1458 | |
1459 | int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, |
1460 | irq_handler_t handler, |
1461 | unsigned long irqflags, |
1462 | const char *devname, void *dev_id) |
1463 | { |
1464 | return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, |
1465 | devname, dev_id, |
1466 | chip: &xen_lateeoi_chip); |
1467 | } |
1468 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); |
1469 | |
1470 | static int bind_interdomain_evtchn_to_irqhandler_chip( |
1471 | struct xenbus_device *dev, evtchn_port_t remote_port, |
1472 | irq_handler_t handler, unsigned long irqflags, |
1473 | const char *devname, void *dev_id, struct irq_chip *chip) |
1474 | { |
1475 | int irq, retval; |
1476 | |
1477 | irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip, |
1478 | shared: irqflags & IRQF_SHARED); |
1479 | if (irq < 0) |
1480 | return irq; |
1481 | |
1482 | retval = request_irq(irq, handler, flags: irqflags, name: devname, dev: dev_id); |
1483 | if (retval != 0) { |
1484 | unbind_from_irq(irq); |
1485 | return retval; |
1486 | } |
1487 | |
1488 | return irq; |
1489 | } |
1490 | |
1491 | int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev, |
1492 | evtchn_port_t remote_port, |
1493 | irq_handler_t handler, |
1494 | unsigned long irqflags, |
1495 | const char *devname, |
1496 | void *dev_id) |
1497 | { |
1498 | return bind_interdomain_evtchn_to_irqhandler_chip(dev, |
1499 | remote_port, handler, irqflags, devname, |
1500 | dev_id, chip: &xen_lateeoi_chip); |
1501 | } |
1502 | EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi); |
1503 | |
1504 | int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, |
1505 | irq_handler_t handler, |
1506 | unsigned long irqflags, const char *devname, void *dev_id) |
1507 | { |
1508 | int irq, retval; |
1509 | |
1510 | irq = bind_virq_to_irq(virq, cpu, percpu: irqflags & IRQF_PERCPU); |
1511 | if (irq < 0) |
1512 | return irq; |
1513 | retval = request_irq(irq, handler, flags: irqflags, name: devname, dev: dev_id); |
1514 | if (retval != 0) { |
1515 | unbind_from_irq(irq); |
1516 | return retval; |
1517 | } |
1518 | |
1519 | return irq; |
1520 | } |
1521 | EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); |
1522 | |
1523 | int bind_ipi_to_irqhandler(enum ipi_vector ipi, |
1524 | unsigned int cpu, |
1525 | irq_handler_t handler, |
1526 | unsigned long irqflags, |
1527 | const char *devname, |
1528 | void *dev_id) |
1529 | { |
1530 | int irq, retval; |
1531 | |
1532 | irq = bind_ipi_to_irq(ipi, cpu); |
1533 | if (irq < 0) |
1534 | return irq; |
1535 | |
1536 | irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME; |
1537 | retval = request_irq(irq, handler, flags: irqflags, name: devname, dev: dev_id); |
1538 | if (retval != 0) { |
1539 | unbind_from_irq(irq); |
1540 | return retval; |
1541 | } |
1542 | |
1543 | return irq; |
1544 | } |
1545 | |
1546 | void unbind_from_irqhandler(unsigned int irq, void *dev_id) |
1547 | { |
1548 | struct irq_info *info = info_for_irq(irq); |
1549 | |
1550 | if (WARN_ON(!info)) |
1551 | return; |
1552 | free_irq(irq, dev_id); |
1553 | unbind_from_irq(irq); |
1554 | } |
1555 | EXPORT_SYMBOL_GPL(unbind_from_irqhandler); |
1556 | |
1557 | /** |
1558 | * xen_set_irq_priority() - set an event channel priority. |
1559 | * @irq:irq bound to an event channel. |
1560 | * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN. |
1561 | */ |
1562 | int xen_set_irq_priority(unsigned irq, unsigned priority) |
1563 | { |
1564 | struct evtchn_set_priority set_priority; |
1565 | |
1566 | set_priority.port = evtchn_from_irq(irq); |
1567 | set_priority.priority = priority; |
1568 | |
1569 | return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority, |
1570 | arg: &set_priority); |
1571 | } |
1572 | EXPORT_SYMBOL_GPL(xen_set_irq_priority); |
1573 | |
1574 | int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static) |
1575 | { |
1576 | struct irq_info *info = evtchn_to_info(evtchn); |
1577 | |
1578 | if (!info) |
1579 | return -ENOENT; |
1580 | |
1581 | WARN_ON(info->refcnt != -1); |
1582 | |
1583 | info->refcnt = 1; |
1584 | info->is_static = is_static; |
1585 | |
1586 | return 0; |
1587 | } |
1588 | EXPORT_SYMBOL_GPL(evtchn_make_refcounted); |
1589 | |
1590 | int evtchn_get(evtchn_port_t evtchn) |
1591 | { |
1592 | struct irq_info *info; |
1593 | int err = -ENOENT; |
1594 | |
1595 | if (evtchn >= xen_evtchn_max_channels()) |
1596 | return -EINVAL; |
1597 | |
1598 | mutex_lock(&irq_mapping_update_lock); |
1599 | |
1600 | info = evtchn_to_info(evtchn); |
1601 | |
1602 | if (!info) |
1603 | goto done; |
1604 | |
1605 | err = -EINVAL; |
1606 | if (info->refcnt <= 0 || info->refcnt == SHRT_MAX) |
1607 | goto done; |
1608 | |
1609 | info->refcnt++; |
1610 | err = 0; |
1611 | done: |
1612 | mutex_unlock(lock: &irq_mapping_update_lock); |
1613 | |
1614 | return err; |
1615 | } |
1616 | EXPORT_SYMBOL_GPL(evtchn_get); |
1617 | |
1618 | void evtchn_put(evtchn_port_t evtchn) |
1619 | { |
1620 | struct irq_info *info = evtchn_to_info(evtchn); |
1621 | |
1622 | if (WARN_ON(!info)) |
1623 | return; |
1624 | unbind_from_irq(irq: info->irq); |
1625 | } |
1626 | EXPORT_SYMBOL_GPL(evtchn_put); |
1627 | |
1628 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) |
1629 | { |
1630 | evtchn_port_t evtchn; |
1631 | |
1632 | #ifdef CONFIG_X86 |
1633 | if (unlikely(vector == XEN_NMI_VECTOR)) { |
1634 | int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, vcpuid: xen_vcpu_nr(cpu), |
1635 | NULL); |
1636 | if (rc < 0) |
1637 | printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n" , cpu, rc); |
1638 | return; |
1639 | } |
1640 | #endif |
1641 | evtchn = per_cpu(ipi_to_evtchn, cpu)[vector]; |
1642 | BUG_ON(evtchn == 0); |
1643 | notify_remote_via_evtchn(port: evtchn); |
1644 | } |
1645 | |
1646 | struct evtchn_loop_ctrl { |
1647 | ktime_t timeout; |
1648 | unsigned count; |
1649 | bool defer_eoi; |
1650 | }; |
1651 | |
1652 | void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) |
1653 | { |
1654 | struct irq_info *info = evtchn_to_info(evtchn: port); |
1655 | struct xenbus_device *dev; |
1656 | |
1657 | if (!info) |
1658 | return; |
1659 | |
1660 | /* |
1661 | * Check for timeout every 256 events. |
1662 | * We are setting the timeout value only after the first 256 |
1663 | * events in order to not hurt the common case of few loop |
1664 | * iterations. The 256 is basically an arbitrary value. |
1665 | * |
1666 | * In case we are hitting the timeout we need to defer all further |
1667 | * EOIs in order to ensure to leave the event handling loop rather |
1668 | * sooner than later. |
1669 | */ |
1670 | if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) { |
1671 | ktime_t kt = ktime_get(); |
1672 | |
1673 | if (!ctrl->timeout) { |
1674 | kt = ktime_add_ms(kt, |
1675 | msec: jiffies_to_msecs(j: event_loop_timeout)); |
1676 | ctrl->timeout = kt; |
1677 | } else if (kt > ctrl->timeout) { |
1678 | ctrl->defer_eoi = true; |
1679 | } |
1680 | } |
1681 | |
1682 | if (xchg_acquire(&info->is_active, 1)) |
1683 | return; |
1684 | |
1685 | dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL; |
1686 | if (dev) |
1687 | atomic_inc(v: &dev->events); |
1688 | |
1689 | if (ctrl->defer_eoi) { |
1690 | info->eoi_cpu = smp_processor_id(); |
1691 | info->irq_epoch = __this_cpu_read(irq_epoch); |
1692 | info->eoi_time = get_jiffies_64() + event_eoi_delay; |
1693 | } |
1694 | |
1695 | generic_handle_irq(irq: info->irq); |
1696 | } |
1697 | |
1698 | int xen_evtchn_do_upcall(void) |
1699 | { |
1700 | struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); |
1701 | int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE; |
1702 | int cpu = smp_processor_id(); |
1703 | struct evtchn_loop_ctrl ctrl = { 0 }; |
1704 | |
1705 | /* |
1706 | * When closing an event channel the associated IRQ must not be freed |
1707 | * until all cpus have left the event handling loop. This is ensured |
1708 | * by taking the rcu_read_lock() while handling events, as freeing of |
1709 | * the IRQ is handled via queue_rcu_work() _after_ closing the event |
1710 | * channel. |
1711 | */ |
1712 | rcu_read_lock(); |
1713 | |
1714 | do { |
1715 | vcpu_info->evtchn_upcall_pending = 0; |
1716 | |
1717 | xen_evtchn_handle_events(cpu, ctrl: &ctrl); |
1718 | |
1719 | BUG_ON(!irqs_disabled()); |
1720 | |
1721 | virt_rmb(); /* Hypervisor can set upcall pending. */ |
1722 | |
1723 | } while (vcpu_info->evtchn_upcall_pending); |
1724 | |
1725 | rcu_read_unlock(); |
1726 | |
1727 | /* |
1728 | * Increment irq_epoch only now to defer EOIs only for |
1729 | * xen_irq_lateeoi() invocations occurring from inside the loop |
1730 | * above. |
1731 | */ |
1732 | __this_cpu_inc(irq_epoch); |
1733 | |
1734 | return ret; |
1735 | } |
1736 | EXPORT_SYMBOL_GPL(xen_evtchn_do_upcall); |
1737 | |
1738 | /* Rebind a new event channel to an existing irq. */ |
1739 | void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) |
1740 | { |
1741 | struct irq_info *info = info_for_irq(irq); |
1742 | |
1743 | if (WARN_ON(!info)) |
1744 | return; |
1745 | |
1746 | /* Make sure the irq is masked, since the new event channel |
1747 | will also be masked. */ |
1748 | disable_irq(irq); |
1749 | |
1750 | mutex_lock(&irq_mapping_update_lock); |
1751 | |
1752 | /* After resume the irq<->evtchn mappings are all cleared out */ |
1753 | BUG_ON(evtchn_to_info(evtchn)); |
1754 | /* Expect irq to have been bound before, |
1755 | so there should be a proper type */ |
1756 | BUG_ON(info->type == IRQT_UNBOUND); |
1757 | |
1758 | info->irq = irq; |
1759 | (void)xen_irq_info_evtchn_setup(info, evtchn, NULL); |
1760 | |
1761 | mutex_unlock(lock: &irq_mapping_update_lock); |
1762 | |
1763 | bind_evtchn_to_cpu(info, cpu: info->cpu, force_affinity: false); |
1764 | |
1765 | /* Unmask the event channel. */ |
1766 | enable_irq(irq); |
1767 | } |
1768 | |
1769 | /* Rebind an evtchn so that it gets delivered to a specific cpu */ |
1770 | static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) |
1771 | { |
1772 | struct evtchn_bind_vcpu bind_vcpu; |
1773 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1774 | |
1775 | if (!VALID_EVTCHN(evtchn)) |
1776 | return -1; |
1777 | |
1778 | if (!xen_support_evtchn_rebind()) |
1779 | return -1; |
1780 | |
1781 | /* Send future instances of this interrupt to other vcpu. */ |
1782 | bind_vcpu.port = evtchn; |
1783 | bind_vcpu.vcpu = xen_vcpu_nr(cpu: tcpu); |
1784 | |
1785 | /* |
1786 | * Mask the event while changing the VCPU binding to prevent |
1787 | * it being delivered on an unexpected VCPU. |
1788 | */ |
1789 | do_mask(info, EVT_MASK_REASON_TEMPORARY); |
1790 | |
1791 | /* |
1792 | * If this fails, it usually just indicates that we're dealing with a |
1793 | * virq or IPI channel, which don't actually need to be rebound. Ignore |
1794 | * it, but don't do the xenlinux-level rebind in that case. |
1795 | */ |
1796 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, arg: &bind_vcpu) >= 0) |
1797 | bind_evtchn_to_cpu(info, cpu: tcpu, force_affinity: false); |
1798 | |
1799 | do_unmask(info, EVT_MASK_REASON_TEMPORARY); |
1800 | |
1801 | return 0; |
1802 | } |
1803 | |
1804 | /* |
1805 | * Find the CPU within @dest mask which has the least number of channels |
1806 | * assigned. This is not precise as the per cpu counts can be modified |
1807 | * concurrently. |
1808 | */ |
1809 | static unsigned int select_target_cpu(const struct cpumask *dest) |
1810 | { |
1811 | unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX; |
1812 | |
1813 | for_each_cpu_and(cpu, dest, cpu_online_mask) { |
1814 | unsigned int curch = atomic_read(v: &channels_on_cpu[cpu]); |
1815 | |
1816 | if (curch < minch) { |
1817 | minch = curch; |
1818 | best_cpu = cpu; |
1819 | } |
1820 | } |
1821 | |
1822 | /* |
1823 | * Catch the unlikely case that dest contains no online CPUs. Can't |
1824 | * recurse. |
1825 | */ |
1826 | if (best_cpu == UINT_MAX) |
1827 | return select_target_cpu(cpu_online_mask); |
1828 | |
1829 | return best_cpu; |
1830 | } |
1831 | |
1832 | static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, |
1833 | bool force) |
1834 | { |
1835 | unsigned int tcpu = select_target_cpu(dest); |
1836 | int ret; |
1837 | |
1838 | ret = xen_rebind_evtchn_to_cpu(info: info_for_irq(irq: data->irq), tcpu); |
1839 | if (!ret) |
1840 | irq_data_update_effective_affinity(d: data, cpumask_of(tcpu)); |
1841 | |
1842 | return ret; |
1843 | } |
1844 | |
1845 | static void enable_dynirq(struct irq_data *data) |
1846 | { |
1847 | struct irq_info *info = info_for_irq(irq: data->irq); |
1848 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1849 | |
1850 | if (VALID_EVTCHN(evtchn)) |
1851 | do_unmask(info, EVT_MASK_REASON_EXPLICIT); |
1852 | } |
1853 | |
1854 | static void do_ack_dynirq(struct irq_info *info) |
1855 | { |
1856 | evtchn_port_t evtchn = info->evtchn; |
1857 | |
1858 | if (VALID_EVTCHN(evtchn)) |
1859 | event_handler_exit(info); |
1860 | } |
1861 | |
1862 | static void ack_dynirq(struct irq_data *data) |
1863 | { |
1864 | struct irq_info *info = info_for_irq(irq: data->irq); |
1865 | |
1866 | if (info) |
1867 | do_ack_dynirq(info); |
1868 | } |
1869 | |
1870 | static void mask_ack_dynirq(struct irq_data *data) |
1871 | { |
1872 | struct irq_info *info = info_for_irq(irq: data->irq); |
1873 | |
1874 | if (info) { |
1875 | do_disable_dynirq(info); |
1876 | do_ack_dynirq(info); |
1877 | } |
1878 | } |
1879 | |
1880 | static void lateeoi_ack_dynirq(struct irq_data *data) |
1881 | { |
1882 | struct irq_info *info = info_for_irq(irq: data->irq); |
1883 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1884 | |
1885 | if (VALID_EVTCHN(evtchn)) { |
1886 | do_mask(info, EVT_MASK_REASON_EOI_PENDING); |
1887 | /* |
1888 | * Don't call event_handler_exit(). |
1889 | * Need to keep is_active non-zero in order to ignore re-raised |
1890 | * events after cpu affinity changes while a lateeoi is pending. |
1891 | */ |
1892 | clear_evtchn(port: evtchn); |
1893 | } |
1894 | } |
1895 | |
1896 | static void lateeoi_mask_ack_dynirq(struct irq_data *data) |
1897 | { |
1898 | struct irq_info *info = info_for_irq(irq: data->irq); |
1899 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1900 | |
1901 | if (VALID_EVTCHN(evtchn)) { |
1902 | do_mask(info, EVT_MASK_REASON_EXPLICIT); |
1903 | event_handler_exit(info); |
1904 | } |
1905 | } |
1906 | |
1907 | static int retrigger_dynirq(struct irq_data *data) |
1908 | { |
1909 | struct irq_info *info = info_for_irq(irq: data->irq); |
1910 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1911 | |
1912 | if (!VALID_EVTCHN(evtchn)) |
1913 | return 0; |
1914 | |
1915 | do_mask(info, EVT_MASK_REASON_TEMPORARY); |
1916 | set_evtchn(evtchn); |
1917 | do_unmask(info, EVT_MASK_REASON_TEMPORARY); |
1918 | |
1919 | return 1; |
1920 | } |
1921 | |
1922 | static void restore_pirqs(void) |
1923 | { |
1924 | int pirq, rc, irq, gsi; |
1925 | struct physdev_map_pirq map_irq; |
1926 | struct irq_info *info; |
1927 | |
1928 | list_for_each_entry(info, &xen_irq_list_head, list) { |
1929 | if (info->type != IRQT_PIRQ) |
1930 | continue; |
1931 | |
1932 | pirq = info->u.pirq.pirq; |
1933 | gsi = info->u.pirq.gsi; |
1934 | irq = info->irq; |
1935 | |
1936 | /* save/restore of PT devices doesn't work, so at this point the |
1937 | * only devices present are GSI based emulated devices */ |
1938 | if (!gsi) |
1939 | continue; |
1940 | |
1941 | map_irq.domid = DOMID_SELF; |
1942 | map_irq.type = MAP_PIRQ_TYPE_GSI; |
1943 | map_irq.index = gsi; |
1944 | map_irq.pirq = pirq; |
1945 | |
1946 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, arg: &map_irq); |
1947 | if (rc) { |
1948 | pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n" , |
1949 | gsi, irq, pirq, rc); |
1950 | xen_free_irq(info); |
1951 | continue; |
1952 | } |
1953 | |
1954 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n" , irq, map_irq.pirq); |
1955 | |
1956 | __startup_pirq(info); |
1957 | } |
1958 | } |
1959 | |
1960 | static void restore_cpu_virqs(unsigned int cpu) |
1961 | { |
1962 | struct evtchn_bind_virq bind_virq; |
1963 | evtchn_port_t evtchn; |
1964 | struct irq_info *info; |
1965 | int virq, irq; |
1966 | |
1967 | for (virq = 0; virq < NR_VIRQS; virq++) { |
1968 | if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) |
1969 | continue; |
1970 | info = info_for_irq(irq); |
1971 | |
1972 | BUG_ON(virq_from_irq(info) != virq); |
1973 | |
1974 | /* Get a new binding from Xen. */ |
1975 | bind_virq.virq = virq; |
1976 | bind_virq.vcpu = xen_vcpu_nr(cpu); |
1977 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, |
1978 | arg: &bind_virq) != 0) |
1979 | BUG(); |
1980 | evtchn = bind_virq.port; |
1981 | |
1982 | /* Record the new mapping. */ |
1983 | xen_irq_info_virq_setup(info, cpu, evtchn, virq); |
1984 | /* The affinity mask is still valid */ |
1985 | bind_evtchn_to_cpu(info, cpu, force_affinity: false); |
1986 | } |
1987 | } |
1988 | |
1989 | static void restore_cpu_ipis(unsigned int cpu) |
1990 | { |
1991 | struct evtchn_bind_ipi bind_ipi; |
1992 | evtchn_port_t evtchn; |
1993 | struct irq_info *info; |
1994 | int ipi, irq; |
1995 | |
1996 | for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { |
1997 | if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) |
1998 | continue; |
1999 | info = info_for_irq(irq); |
2000 | |
2001 | BUG_ON(ipi_from_irq(info) != ipi); |
2002 | |
2003 | /* Get a new binding from Xen. */ |
2004 | bind_ipi.vcpu = xen_vcpu_nr(cpu); |
2005 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, |
2006 | arg: &bind_ipi) != 0) |
2007 | BUG(); |
2008 | evtchn = bind_ipi.port; |
2009 | |
2010 | /* Record the new mapping. */ |
2011 | xen_irq_info_ipi_setup(info, cpu, evtchn, ipi); |
2012 | /* The affinity mask is still valid */ |
2013 | bind_evtchn_to_cpu(info, cpu, force_affinity: false); |
2014 | } |
2015 | } |
2016 | |
2017 | /* Clear an irq's pending state, in preparation for polling on it */ |
2018 | void xen_clear_irq_pending(int irq) |
2019 | { |
2020 | struct irq_info *info = info_for_irq(irq); |
2021 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
2022 | |
2023 | if (VALID_EVTCHN(evtchn)) |
2024 | event_handler_exit(info); |
2025 | } |
2026 | EXPORT_SYMBOL(xen_clear_irq_pending); |
2027 | |
2028 | bool xen_test_irq_pending(int irq) |
2029 | { |
2030 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
2031 | bool ret = false; |
2032 | |
2033 | if (VALID_EVTCHN(evtchn)) |
2034 | ret = test_evtchn(port: evtchn); |
2035 | |
2036 | return ret; |
2037 | } |
2038 | |
2039 | /* Poll waiting for an irq to become pending with timeout. In the usual case, |
2040 | * the irq will be disabled so it won't deliver an interrupt. */ |
2041 | void xen_poll_irq_timeout(int irq, u64 timeout) |
2042 | { |
2043 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
2044 | |
2045 | if (VALID_EVTCHN(evtchn)) { |
2046 | struct sched_poll poll; |
2047 | |
2048 | poll.nr_ports = 1; |
2049 | poll.timeout = timeout; |
2050 | set_xen_guest_handle(poll.ports, &evtchn); |
2051 | |
2052 | if (HYPERVISOR_sched_op(SCHEDOP_poll, arg: &poll) != 0) |
2053 | BUG(); |
2054 | } |
2055 | } |
2056 | EXPORT_SYMBOL(xen_poll_irq_timeout); |
2057 | /* Poll waiting for an irq to become pending. In the usual case, the |
2058 | * irq will be disabled so it won't deliver an interrupt. */ |
2059 | void xen_poll_irq(int irq) |
2060 | { |
2061 | xen_poll_irq_timeout(irq, 0 /* no timeout */); |
2062 | } |
2063 | |
2064 | /* Check whether the IRQ line is shared with other guests. */ |
2065 | int xen_test_irq_shared(int irq) |
2066 | { |
2067 | struct irq_info *info = info_for_irq(irq); |
2068 | struct physdev_irq_status_query irq_status; |
2069 | |
2070 | if (WARN_ON(!info)) |
2071 | return -ENOENT; |
2072 | |
2073 | irq_status.irq = info->u.pirq.pirq; |
2074 | |
2075 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, arg: &irq_status)) |
2076 | return 0; |
2077 | return !(irq_status.flags & XENIRQSTAT_shared); |
2078 | } |
2079 | EXPORT_SYMBOL_GPL(xen_test_irq_shared); |
2080 | |
2081 | void xen_irq_resume(void) |
2082 | { |
2083 | unsigned int cpu; |
2084 | struct irq_info *info; |
2085 | |
2086 | /* New event-channel space is not 'live' yet. */ |
2087 | xen_evtchn_resume(); |
2088 | |
2089 | /* No IRQ <-> event-channel mappings. */ |
2090 | list_for_each_entry(info, &xen_irq_list_head, list) { |
2091 | /* Zap event-channel binding */ |
2092 | info->evtchn = 0; |
2093 | /* Adjust accounting */ |
2094 | channels_on_cpu_dec(info); |
2095 | } |
2096 | |
2097 | clear_evtchn_to_irq_all(); |
2098 | |
2099 | for_each_possible_cpu(cpu) { |
2100 | restore_cpu_virqs(cpu); |
2101 | restore_cpu_ipis(cpu); |
2102 | } |
2103 | |
2104 | restore_pirqs(); |
2105 | } |
2106 | |
2107 | static struct irq_chip xen_dynamic_chip __read_mostly = { |
2108 | .name = "xen-dyn" , |
2109 | |
2110 | .irq_disable = disable_dynirq, |
2111 | .irq_mask = disable_dynirq, |
2112 | .irq_unmask = enable_dynirq, |
2113 | |
2114 | .irq_ack = ack_dynirq, |
2115 | .irq_mask_ack = mask_ack_dynirq, |
2116 | |
2117 | .irq_set_affinity = set_affinity_irq, |
2118 | .irq_retrigger = retrigger_dynirq, |
2119 | }; |
2120 | |
2121 | static struct irq_chip xen_lateeoi_chip __read_mostly = { |
2122 | /* The chip name needs to contain "xen-dyn" for irqbalance to work. */ |
2123 | .name = "xen-dyn-lateeoi" , |
2124 | |
2125 | .irq_disable = disable_dynirq, |
2126 | .irq_mask = disable_dynirq, |
2127 | .irq_unmask = enable_dynirq, |
2128 | |
2129 | .irq_ack = lateeoi_ack_dynirq, |
2130 | .irq_mask_ack = lateeoi_mask_ack_dynirq, |
2131 | |
2132 | .irq_set_affinity = set_affinity_irq, |
2133 | .irq_retrigger = retrigger_dynirq, |
2134 | }; |
2135 | |
2136 | static struct irq_chip xen_pirq_chip __read_mostly = { |
2137 | .name = "xen-pirq" , |
2138 | |
2139 | .irq_startup = startup_pirq, |
2140 | .irq_shutdown = shutdown_pirq, |
2141 | .irq_enable = enable_pirq, |
2142 | .irq_disable = disable_pirq, |
2143 | |
2144 | .irq_mask = disable_dynirq, |
2145 | .irq_unmask = enable_dynirq, |
2146 | |
2147 | .irq_ack = eoi_pirq, |
2148 | .irq_eoi = eoi_pirq, |
2149 | .irq_mask_ack = mask_ack_pirq, |
2150 | |
2151 | .irq_set_affinity = set_affinity_irq, |
2152 | |
2153 | .irq_retrigger = retrigger_dynirq, |
2154 | }; |
2155 | |
2156 | static struct irq_chip xen_percpu_chip __read_mostly = { |
2157 | .name = "xen-percpu" , |
2158 | |
2159 | .irq_disable = disable_dynirq, |
2160 | .irq_mask = disable_dynirq, |
2161 | .irq_unmask = enable_dynirq, |
2162 | |
2163 | .irq_ack = ack_dynirq, |
2164 | }; |
2165 | |
2166 | #ifdef CONFIG_X86 |
2167 | #ifdef CONFIG_XEN_PVHVM |
2168 | /* Vector callbacks are better than PCI interrupts to receive event |
2169 | * channel notifications because we can receive vector callbacks on any |
2170 | * vcpu and we don't need PCI support or APIC interactions. */ |
2171 | void xen_setup_callback_vector(void) |
2172 | { |
2173 | uint64_t callback_via; |
2174 | |
2175 | if (xen_have_vector_callback) { |
2176 | callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); |
2177 | if (xen_set_callback_via(via: callback_via)) { |
2178 | pr_err("Request for Xen HVM callback vector failed\n" ); |
2179 | xen_have_vector_callback = false; |
2180 | } |
2181 | } |
2182 | } |
2183 | |
2184 | /* |
2185 | * Setup per-vCPU vector-type callbacks. If this setup is unavailable, |
2186 | * fallback to the global vector-type callback. |
2187 | */ |
2188 | static __init void xen_init_setup_upcall_vector(void) |
2189 | { |
2190 | if (!xen_have_vector_callback) |
2191 | return; |
2192 | |
2193 | if ((cpuid_eax(op: xen_cpuid_base() + 4) & XEN_HVM_CPUID_UPCALL_VECTOR) && |
2194 | !xen_set_upcall_vector(cpu: 0)) |
2195 | xen_percpu_upcall = true; |
2196 | else if (xen_feature(XENFEAT_hvm_callback_vector)) |
2197 | xen_setup_callback_vector(); |
2198 | else |
2199 | xen_have_vector_callback = false; |
2200 | } |
2201 | |
2202 | int xen_set_upcall_vector(unsigned int cpu) |
2203 | { |
2204 | int rc; |
2205 | xen_hvm_evtchn_upcall_vector_t op = { |
2206 | .vector = HYPERVISOR_CALLBACK_VECTOR, |
2207 | .vcpu = per_cpu(xen_vcpu_id, cpu), |
2208 | }; |
2209 | |
2210 | rc = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, arg: &op); |
2211 | if (rc) |
2212 | return rc; |
2213 | |
2214 | /* Trick toolstack to think we are enlightened. */ |
2215 | if (!cpu) |
2216 | rc = xen_set_callback_via(via: 1); |
2217 | |
2218 | return rc; |
2219 | } |
2220 | |
2221 | static __init void xen_alloc_callback_vector(void) |
2222 | { |
2223 | if (!xen_have_vector_callback) |
2224 | return; |
2225 | |
2226 | pr_info("Xen HVM callback vector for event delivery is enabled\n" ); |
2227 | sysvec_install(HYPERVISOR_CALLBACK_VECTOR, sysvec_xen_hvm_callback); |
2228 | } |
2229 | #else |
2230 | void xen_setup_callback_vector(void) {} |
2231 | static inline void xen_init_setup_upcall_vector(void) {} |
2232 | int xen_set_upcall_vector(unsigned int cpu) {} |
2233 | static inline void xen_alloc_callback_vector(void) {} |
2234 | #endif /* CONFIG_XEN_PVHVM */ |
2235 | #endif /* CONFIG_X86 */ |
2236 | |
2237 | bool xen_fifo_events = true; |
2238 | module_param_named(fifo_events, xen_fifo_events, bool, 0); |
2239 | |
2240 | static int xen_evtchn_cpu_prepare(unsigned int cpu) |
2241 | { |
2242 | int ret = 0; |
2243 | |
2244 | xen_cpu_init_eoi(cpu); |
2245 | |
2246 | if (evtchn_ops->percpu_init) |
2247 | ret = evtchn_ops->percpu_init(cpu); |
2248 | |
2249 | return ret; |
2250 | } |
2251 | |
2252 | static int xen_evtchn_cpu_dead(unsigned int cpu) |
2253 | { |
2254 | int ret = 0; |
2255 | |
2256 | if (evtchn_ops->percpu_deinit) |
2257 | ret = evtchn_ops->percpu_deinit(cpu); |
2258 | |
2259 | return ret; |
2260 | } |
2261 | |
2262 | void __init xen_init_IRQ(void) |
2263 | { |
2264 | int ret = -EINVAL; |
2265 | evtchn_port_t evtchn; |
2266 | |
2267 | if (xen_fifo_events) |
2268 | ret = xen_evtchn_fifo_init(); |
2269 | if (ret < 0) { |
2270 | xen_evtchn_2l_init(); |
2271 | xen_fifo_events = false; |
2272 | } |
2273 | |
2274 | xen_cpu_init_eoi(smp_processor_id()); |
2275 | |
2276 | cpuhp_setup_state_nocalls(state: CPUHP_XEN_EVTCHN_PREPARE, |
2277 | name: "xen/evtchn:prepare" , |
2278 | startup: xen_evtchn_cpu_prepare, teardown: xen_evtchn_cpu_dead); |
2279 | |
2280 | evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), |
2281 | size: sizeof(*evtchn_to_irq), GFP_KERNEL); |
2282 | BUG_ON(!evtchn_to_irq); |
2283 | |
2284 | /* No event channels are 'live' right now. */ |
2285 | for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++) |
2286 | mask_evtchn(port: evtchn); |
2287 | |
2288 | pirq_needs_eoi = pirq_needs_eoi_flag; |
2289 | |
2290 | #ifdef CONFIG_X86 |
2291 | if (xen_pv_domain()) { |
2292 | if (xen_initial_domain()) |
2293 | pci_xen_initial_domain(); |
2294 | } |
2295 | xen_init_setup_upcall_vector(); |
2296 | xen_alloc_callback_vector(); |
2297 | |
2298 | |
2299 | if (xen_hvm_domain()) { |
2300 | native_init_IRQ(); |
2301 | /* pci_xen_hvm_init must be called after native_init_IRQ so that |
2302 | * __acpi_register_gsi can point at the right function */ |
2303 | pci_xen_hvm_init(); |
2304 | } else { |
2305 | int rc; |
2306 | struct physdev_pirq_eoi_gmfn eoi_gmfn; |
2307 | |
2308 | pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); |
2309 | eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map); |
2310 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, arg: &eoi_gmfn); |
2311 | if (rc != 0) { |
2312 | free_page((unsigned long) pirq_eoi_map); |
2313 | pirq_eoi_map = NULL; |
2314 | } else |
2315 | pirq_needs_eoi = pirq_check_eoi_map; |
2316 | } |
2317 | #endif |
2318 | } |
2319 | |