| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | |
| 3 | /* |
| 4 | * Hyper-V stub IOMMU driver. |
| 5 | * |
| 6 | * Copyright (C) 2019, Microsoft, Inc. |
| 7 | * |
| 8 | * Author : Lan Tianyu <Tianyu.Lan@microsoft.com> |
| 9 | */ |
| 10 | |
| 11 | #include <linux/types.h> |
| 12 | #include <linux/interrupt.h> |
| 13 | #include <linux/irq.h> |
| 14 | #include <linux/iommu.h> |
| 15 | #include <linux/module.h> |
| 16 | |
| 17 | #include <asm/apic.h> |
| 18 | #include <asm/cpu.h> |
| 19 | #include <asm/hw_irq.h> |
| 20 | #include <asm/io_apic.h> |
| 21 | #include <asm/irq_remapping.h> |
| 22 | #include <asm/hypervisor.h> |
| 23 | #include <asm/mshyperv.h> |
| 24 | |
| 25 | #include "irq_remapping.h" |
| 26 | |
| 27 | #ifdef CONFIG_IRQ_REMAP |
| 28 | |
| 29 | /* |
| 30 | * According 82093AA IO-APIC spec , IO APIC has a 24-entry Interrupt |
| 31 | * Redirection Table. Hyper-V exposes one single IO-APIC and so define |
| 32 | * 24 IO APIC remmapping entries. |
| 33 | */ |
| 34 | #define IOAPIC_REMAPPING_ENTRY 24 |
| 35 | |
| 36 | static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE }; |
| 37 | static struct irq_domain *ioapic_ir_domain; |
| 38 | |
| 39 | static int hyperv_ir_set_affinity(struct irq_data *data, |
| 40 | const struct cpumask *mask, bool force) |
| 41 | { |
| 42 | struct irq_data *parent = data->parent_data; |
| 43 | struct irq_cfg *cfg = irqd_cfg(irq_data: data); |
| 44 | int ret; |
| 45 | |
| 46 | /* Return error If new irq affinity is out of ioapic_max_cpumask. */ |
| 47 | if (!cpumask_subset(src1p: mask, src2p: &ioapic_max_cpumask)) |
| 48 | return -EINVAL; |
| 49 | |
| 50 | ret = parent->chip->irq_set_affinity(parent, mask, force); |
| 51 | if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) |
| 52 | return ret; |
| 53 | |
| 54 | vector_schedule_cleanup(cfg); |
| 55 | |
| 56 | return 0; |
| 57 | } |
| 58 | |
| 59 | static struct irq_chip hyperv_ir_chip = { |
| 60 | .name = "HYPERV-IR" , |
| 61 | .irq_ack = apic_ack_irq, |
| 62 | .irq_set_affinity = hyperv_ir_set_affinity, |
| 63 | }; |
| 64 | |
| 65 | static int hyperv_irq_remapping_alloc(struct irq_domain *domain, |
| 66 | unsigned int virq, unsigned int nr_irqs, |
| 67 | void *arg) |
| 68 | { |
| 69 | struct irq_alloc_info *info = arg; |
| 70 | struct irq_data *irq_data; |
| 71 | int ret = 0; |
| 72 | |
| 73 | if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) |
| 74 | return -EINVAL; |
| 75 | |
| 76 | ret = irq_domain_alloc_irqs_parent(domain, irq_base: virq, nr_irqs, arg); |
| 77 | if (ret < 0) |
| 78 | return ret; |
| 79 | |
| 80 | irq_data = irq_domain_get_irq_data(domain, virq); |
| 81 | if (!irq_data) { |
| 82 | irq_domain_free_irqs_common(domain, virq, nr_irqs); |
| 83 | return -EINVAL; |
| 84 | } |
| 85 | |
| 86 | irq_data->chip = &hyperv_ir_chip; |
| 87 | |
| 88 | /* |
| 89 | * Hypver-V IO APIC irq affinity should be in the scope of |
| 90 | * ioapic_max_cpumask because no irq remapping support. |
| 91 | */ |
| 92 | irq_data_update_affinity(d: irq_data, m: &ioapic_max_cpumask); |
| 93 | |
| 94 | return 0; |
| 95 | } |
| 96 | |
| 97 | static void hyperv_irq_remapping_free(struct irq_domain *domain, |
| 98 | unsigned int virq, unsigned int nr_irqs) |
| 99 | { |
| 100 | irq_domain_free_irqs_common(domain, virq, nr_irqs); |
| 101 | } |
| 102 | |
| 103 | static int hyperv_irq_remapping_select(struct irq_domain *d, |
| 104 | struct irq_fwspec *fwspec, |
| 105 | enum irq_domain_bus_token bus_token) |
| 106 | { |
| 107 | /* Claim the only I/O APIC emulated by Hyper-V */ |
| 108 | return x86_fwspec_is_ioapic(fwspec); |
| 109 | } |
| 110 | |
| 111 | static const struct irq_domain_ops hyperv_ir_domain_ops = { |
| 112 | .select = hyperv_irq_remapping_select, |
| 113 | .alloc = hyperv_irq_remapping_alloc, |
| 114 | .free = hyperv_irq_remapping_free, |
| 115 | }; |
| 116 | |
| 117 | static const struct irq_domain_ops hyperv_root_ir_domain_ops; |
| 118 | static int __init hyperv_prepare_irq_remapping(void) |
| 119 | { |
| 120 | struct fwnode_handle *fn; |
| 121 | int i; |
| 122 | const char *name; |
| 123 | const struct irq_domain_ops *ops; |
| 124 | |
| 125 | /* |
| 126 | * For a Hyper-V root partition, ms_hyperv_msi_ext_dest_id() |
| 127 | * will always return false. |
| 128 | */ |
| 129 | if (!hypervisor_is_type(type: X86_HYPER_MS_HYPERV) || |
| 130 | x86_init.hyper.msi_ext_dest_id()) |
| 131 | return -ENODEV; |
| 132 | |
| 133 | if (hv_root_partition()) { |
| 134 | name = "HYPERV-ROOT-IR" ; |
| 135 | ops = &hyperv_root_ir_domain_ops; |
| 136 | } else { |
| 137 | name = "HYPERV-IR" ; |
| 138 | ops = &hyperv_ir_domain_ops; |
| 139 | } |
| 140 | |
| 141 | fn = irq_domain_alloc_named_id_fwnode(name, id: 0); |
| 142 | if (!fn) |
| 143 | return -ENOMEM; |
| 144 | |
| 145 | ioapic_ir_domain = |
| 146 | irq_domain_create_hierarchy(parent: arch_get_ir_parent_domain(), |
| 147 | flags: 0, IOAPIC_REMAPPING_ENTRY, fwnode: fn, ops, NULL); |
| 148 | |
| 149 | if (!ioapic_ir_domain) { |
| 150 | irq_domain_free_fwnode(fwnode: fn); |
| 151 | return -ENOMEM; |
| 152 | } |
| 153 | |
| 154 | if (hv_root_partition()) |
| 155 | return 0; /* The rest is only relevant to guests */ |
| 156 | |
| 157 | /* |
| 158 | * Hyper-V doesn't provide irq remapping function for |
| 159 | * IO-APIC and so IO-APIC only accepts 8-bit APIC ID. |
| 160 | * Cpu's APIC ID is read from ACPI MADT table and APIC IDs |
| 161 | * in the MADT table on Hyper-v are sorted monotonic increasingly. |
| 162 | * APIC ID reflects cpu topology. There maybe some APIC ID |
| 163 | * gaps when cpu number in a socket is not power of two. Prepare |
| 164 | * max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu |
| 165 | * into ioapic_max_cpumask if its APIC ID is less than 256. |
| 166 | */ |
| 167 | for (i = min_t(unsigned int, nr_cpu_ids - 1, 255); i >= 0; i--) |
| 168 | if (cpu_possible(cpu: i) && cpu_physical_id(i) < 256) |
| 169 | cpumask_set_cpu(cpu: i, dstp: &ioapic_max_cpumask); |
| 170 | |
| 171 | return 0; |
| 172 | } |
| 173 | |
| 174 | static int __init hyperv_enable_irq_remapping(void) |
| 175 | { |
| 176 | if (x2apic_supported()) |
| 177 | return IRQ_REMAP_X2APIC_MODE; |
| 178 | return IRQ_REMAP_XAPIC_MODE; |
| 179 | } |
| 180 | |
| 181 | struct irq_remap_ops hyperv_irq_remap_ops = { |
| 182 | .prepare = hyperv_prepare_irq_remapping, |
| 183 | .enable = hyperv_enable_irq_remapping, |
| 184 | }; |
| 185 | |
| 186 | /* IRQ remapping domain when Linux runs as the root partition */ |
| 187 | struct hyperv_root_ir_data { |
| 188 | u8 ioapic_id; |
| 189 | bool is_level; |
| 190 | struct hv_interrupt_entry entry; |
| 191 | }; |
| 192 | |
| 193 | static void |
| 194 | hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) |
| 195 | { |
| 196 | u64 status; |
| 197 | u32 vector; |
| 198 | struct irq_cfg *cfg; |
| 199 | int ioapic_id; |
| 200 | const struct cpumask *affinity; |
| 201 | int cpu; |
| 202 | struct hv_interrupt_entry entry; |
| 203 | struct hyperv_root_ir_data *data = irq_data->chip_data; |
| 204 | struct IO_APIC_route_entry e; |
| 205 | |
| 206 | cfg = irqd_cfg(irq_data); |
| 207 | affinity = irq_data_get_effective_affinity_mask(d: irq_data); |
| 208 | cpu = cpumask_first_and(srcp1: affinity, cpu_online_mask); |
| 209 | |
| 210 | vector = cfg->vector; |
| 211 | ioapic_id = data->ioapic_id; |
| 212 | |
| 213 | if (data->entry.source == HV_DEVICE_TYPE_IOAPIC |
| 214 | && data->entry.ioapic_rte.as_uint64) { |
| 215 | entry = data->entry; |
| 216 | |
| 217 | status = hv_unmap_ioapic_interrupt(ioapic_id, entry: &entry); |
| 218 | |
| 219 | if (status != HV_STATUS_SUCCESS) |
| 220 | hv_status_debug(status, "failed to unmap\n" ); |
| 221 | |
| 222 | data->entry.ioapic_rte.as_uint64 = 0; |
| 223 | data->entry.source = 0; /* Invalid source */ |
| 224 | } |
| 225 | |
| 226 | |
| 227 | status = hv_map_ioapic_interrupt(ioapic_id, level: data->is_level, vcpu: cpu, |
| 228 | vector, entry: &entry); |
| 229 | |
| 230 | if (status != HV_STATUS_SUCCESS) { |
| 231 | hv_status_err(status, "map failed\n" ); |
| 232 | return; |
| 233 | } |
| 234 | |
| 235 | data->entry = entry; |
| 236 | |
| 237 | /* Turn it into an IO_APIC_route_entry, and generate MSI MSG. */ |
| 238 | e.w1 = entry.ioapic_rte.low_uint32; |
| 239 | e.w2 = entry.ioapic_rte.high_uint32; |
| 240 | |
| 241 | memset(msg, 0, sizeof(*msg)); |
| 242 | msg->arch_data.vector = e.vector; |
| 243 | msg->arch_data.delivery_mode = e.delivery_mode; |
| 244 | msg->arch_addr_lo.dest_mode_logical = e.dest_mode_logical; |
| 245 | msg->arch_addr_lo.dmar_format = e.ir_format; |
| 246 | msg->arch_addr_lo.dmar_index_0_14 = e.ir_index_0_14; |
| 247 | } |
| 248 | |
| 249 | static int hyperv_root_ir_set_affinity(struct irq_data *data, |
| 250 | const struct cpumask *mask, bool force) |
| 251 | { |
| 252 | struct irq_data *parent = data->parent_data; |
| 253 | struct irq_cfg *cfg = irqd_cfg(irq_data: data); |
| 254 | int ret; |
| 255 | |
| 256 | ret = parent->chip->irq_set_affinity(parent, mask, force); |
| 257 | if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) |
| 258 | return ret; |
| 259 | |
| 260 | vector_schedule_cleanup(cfg); |
| 261 | |
| 262 | return 0; |
| 263 | } |
| 264 | |
| 265 | static struct irq_chip hyperv_root_ir_chip = { |
| 266 | .name = "HYPERV-ROOT-IR" , |
| 267 | .irq_ack = apic_ack_irq, |
| 268 | .irq_set_affinity = hyperv_root_ir_set_affinity, |
| 269 | .irq_compose_msi_msg = hyperv_root_ir_compose_msi_msg, |
| 270 | }; |
| 271 | |
| 272 | static int hyperv_root_irq_remapping_alloc(struct irq_domain *domain, |
| 273 | unsigned int virq, unsigned int nr_irqs, |
| 274 | void *arg) |
| 275 | { |
| 276 | struct irq_alloc_info *info = arg; |
| 277 | struct irq_data *irq_data; |
| 278 | struct hyperv_root_ir_data *data; |
| 279 | int ret = 0; |
| 280 | |
| 281 | if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) |
| 282 | return -EINVAL; |
| 283 | |
| 284 | ret = irq_domain_alloc_irqs_parent(domain, irq_base: virq, nr_irqs, arg); |
| 285 | if (ret < 0) |
| 286 | return ret; |
| 287 | |
| 288 | data = kzalloc(sizeof(*data), GFP_KERNEL); |
| 289 | if (!data) { |
| 290 | irq_domain_free_irqs_common(domain, virq, nr_irqs); |
| 291 | return -ENOMEM; |
| 292 | } |
| 293 | |
| 294 | irq_data = irq_domain_get_irq_data(domain, virq); |
| 295 | if (!irq_data) { |
| 296 | kfree(objp: data); |
| 297 | irq_domain_free_irqs_common(domain, virq, nr_irqs); |
| 298 | return -EINVAL; |
| 299 | } |
| 300 | |
| 301 | data->ioapic_id = info->devid; |
| 302 | data->is_level = info->ioapic.is_level; |
| 303 | |
| 304 | irq_data->chip = &hyperv_root_ir_chip; |
| 305 | irq_data->chip_data = data; |
| 306 | |
| 307 | return 0; |
| 308 | } |
| 309 | |
| 310 | static void hyperv_root_irq_remapping_free(struct irq_domain *domain, |
| 311 | unsigned int virq, unsigned int nr_irqs) |
| 312 | { |
| 313 | struct irq_data *irq_data; |
| 314 | struct hyperv_root_ir_data *data; |
| 315 | struct hv_interrupt_entry *e; |
| 316 | int i; |
| 317 | |
| 318 | for (i = 0; i < nr_irqs; i++) { |
| 319 | irq_data = irq_domain_get_irq_data(domain, virq: virq + i); |
| 320 | |
| 321 | if (irq_data && irq_data->chip_data) { |
| 322 | data = irq_data->chip_data; |
| 323 | e = &data->entry; |
| 324 | |
| 325 | if (e->source == HV_DEVICE_TYPE_IOAPIC |
| 326 | && e->ioapic_rte.as_uint64) |
| 327 | hv_unmap_ioapic_interrupt(ioapic_id: data->ioapic_id, |
| 328 | entry: &data->entry); |
| 329 | |
| 330 | kfree(objp: data); |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | irq_domain_free_irqs_common(domain, virq, nr_irqs); |
| 335 | } |
| 336 | |
| 337 | static const struct irq_domain_ops hyperv_root_ir_domain_ops = { |
| 338 | .select = hyperv_irq_remapping_select, |
| 339 | .alloc = hyperv_root_irq_remapping_alloc, |
| 340 | .free = hyperv_root_irq_remapping_free, |
| 341 | }; |
| 342 | |
| 343 | #endif |
| 344 | |