| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/types.h> |
| 3 | #include <linux/interrupt.h> |
| 4 | #include <linux/irq_work.h> |
| 5 | #include <linux/jump_label.h> |
| 6 | #include <linux/kvm_para.h> |
| 7 | #include <linux/reboot.h> |
| 8 | #include <linux/static_call.h> |
| 9 | #include <asm/paravirt.h> |
| 10 | |
| 11 | static int has_steal_clock; |
| 12 | struct static_key paravirt_steal_enabled; |
| 13 | struct static_key paravirt_steal_rq_enabled; |
| 14 | static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); |
| 15 | DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); |
| 16 | |
| 17 | static u64 native_steal_clock(int cpu) |
| 18 | { |
| 19 | return 0; |
| 20 | } |
| 21 | |
| 22 | DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); |
| 23 | |
| 24 | static bool steal_acc = true; |
| 25 | |
| 26 | static int __init parse_no_stealacc(char *arg) |
| 27 | { |
| 28 | steal_acc = false; |
| 29 | return 0; |
| 30 | } |
| 31 | early_param("no-steal-acc" , parse_no_stealacc); |
| 32 | |
| 33 | static u64 paravt_steal_clock(int cpu) |
| 34 | { |
| 35 | int version; |
| 36 | u64 steal; |
| 37 | struct kvm_steal_time *src; |
| 38 | |
| 39 | src = &per_cpu(steal_time, cpu); |
| 40 | do { |
| 41 | |
| 42 | version = src->version; |
| 43 | virt_rmb(); /* Make sure that the version is read before the steal */ |
| 44 | steal = src->steal; |
| 45 | virt_rmb(); /* Make sure that the steal is read before the next version */ |
| 46 | |
| 47 | } while ((version & 1) || (version != src->version)); |
| 48 | |
| 49 | return steal; |
| 50 | } |
| 51 | |
| 52 | #ifdef CONFIG_SMP |
| 53 | static struct smp_ops native_ops; |
| 54 | |
| 55 | static void pv_send_ipi_single(int cpu, unsigned int action) |
| 56 | { |
| 57 | int min, old; |
| 58 | irq_cpustat_t *info = &per_cpu(irq_stat, cpu); |
| 59 | |
| 60 | if (unlikely(action == ACTION_BOOT_CPU)) { |
| 61 | native_ops.send_ipi_single(cpu, action); |
| 62 | return; |
| 63 | } |
| 64 | |
| 65 | old = atomic_fetch_or(BIT(action), v: &info->message); |
| 66 | if (old) |
| 67 | return; |
| 68 | |
| 69 | min = cpu_logical_map(cpu); |
| 70 | kvm_hypercall3(nr: KVM_HCALL_FUNC_IPI, p1: 1, p2: 0, p3: min); |
| 71 | } |
| 72 | |
| 73 | #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) |
| 74 | |
| 75 | static void pv_send_ipi_mask(const struct cpumask *mask, unsigned int action) |
| 76 | { |
| 77 | int i, cpu, min = 0, max = 0, old; |
| 78 | __uint128_t bitmap = 0; |
| 79 | irq_cpustat_t *info; |
| 80 | |
| 81 | if (cpumask_empty(srcp: mask)) |
| 82 | return; |
| 83 | |
| 84 | if (unlikely(action == ACTION_BOOT_CPU)) { |
| 85 | native_ops.send_ipi_mask(mask, action); |
| 86 | return; |
| 87 | } |
| 88 | |
| 89 | action = BIT(action); |
| 90 | for_each_cpu(i, mask) { |
| 91 | info = &per_cpu(irq_stat, i); |
| 92 | old = atomic_fetch_or(i: action, v: &info->message); |
| 93 | if (old) |
| 94 | continue; |
| 95 | |
| 96 | cpu = cpu_logical_map(i); |
| 97 | if (!bitmap) { |
| 98 | min = max = cpu; |
| 99 | } else if (cpu < min && cpu > (max - KVM_IPI_CLUSTER_SIZE)) { |
| 100 | /* cpu < min, and bitmap still enough */ |
| 101 | bitmap <<= min - cpu; |
| 102 | min = cpu; |
| 103 | } else if (cpu > min && cpu < (min + KVM_IPI_CLUSTER_SIZE)) { |
| 104 | /* cpu > min, and bitmap still enough */ |
| 105 | max = cpu > max ? cpu : max; |
| 106 | } else { |
| 107 | /* |
| 108 | * With cpu, bitmap will exceed KVM_IPI_CLUSTER_SIZE, |
| 109 | * send IPI here directly and skip the remaining CPUs. |
| 110 | */ |
| 111 | kvm_hypercall3(nr: KVM_HCALL_FUNC_IPI, p1: (unsigned long)bitmap, |
| 112 | p2: (unsigned long)(bitmap >> BITS_PER_LONG), p3: min); |
| 113 | min = max = cpu; |
| 114 | bitmap = 0; |
| 115 | } |
| 116 | __set_bit(cpu - min, (unsigned long *)&bitmap); |
| 117 | } |
| 118 | |
| 119 | if (bitmap) |
| 120 | kvm_hypercall3(nr: KVM_HCALL_FUNC_IPI, p1: (unsigned long)bitmap, |
| 121 | p2: (unsigned long)(bitmap >> BITS_PER_LONG), p3: min); |
| 122 | } |
| 123 | |
| 124 | static irqreturn_t pv_ipi_interrupt(int irq, void *dev) |
| 125 | { |
| 126 | u32 action; |
| 127 | irq_cpustat_t *info; |
| 128 | |
| 129 | /* Clear SWI interrupt */ |
| 130 | clear_csr_estat(1 << INT_SWI0); |
| 131 | info = this_cpu_ptr(&irq_stat); |
| 132 | action = atomic_xchg(v: &info->message, new: 0); |
| 133 | |
| 134 | if (action & SMP_RESCHEDULE) { |
| 135 | scheduler_ipi(); |
| 136 | info->ipi_irqs[IPI_RESCHEDULE]++; |
| 137 | } |
| 138 | |
| 139 | if (action & SMP_CALL_FUNCTION) { |
| 140 | generic_smp_call_function_interrupt(); |
| 141 | info->ipi_irqs[IPI_CALL_FUNCTION]++; |
| 142 | } |
| 143 | |
| 144 | if (action & SMP_IRQ_WORK) { |
| 145 | irq_work_run(); |
| 146 | info->ipi_irqs[IPI_IRQ_WORK]++; |
| 147 | } |
| 148 | |
| 149 | if (action & SMP_CLEAR_VECTOR) { |
| 150 | complete_irq_moving(); |
| 151 | info->ipi_irqs[IPI_CLEAR_VECTOR]++; |
| 152 | } |
| 153 | |
| 154 | return IRQ_HANDLED; |
| 155 | } |
| 156 | |
| 157 | static void pv_init_ipi(void) |
| 158 | { |
| 159 | int r, swi; |
| 160 | |
| 161 | /* Init native ipi irq for ACTION_BOOT_CPU */ |
| 162 | native_ops.init_ipi(); |
| 163 | swi = get_percpu_irq(INT_SWI0); |
| 164 | if (swi < 0) |
| 165 | panic(fmt: "SWI0 IRQ mapping failed\n" ); |
| 166 | irq_set_percpu_devid(swi); |
| 167 | r = request_percpu_irq(irq: swi, handler: pv_ipi_interrupt, devname: "SWI0-IPI" , percpu_dev_id: &irq_stat); |
| 168 | if (r < 0) |
| 169 | panic(fmt: "SWI0 IRQ request failed\n" ); |
| 170 | } |
| 171 | #endif |
| 172 | |
| 173 | bool kvm_para_available(void) |
| 174 | { |
| 175 | int config; |
| 176 | static int hypervisor_type; |
| 177 | |
| 178 | if (!cpu_has_hypervisor) |
| 179 | return false; |
| 180 | |
| 181 | if (!hypervisor_type) { |
| 182 | config = read_cpucfg(CPUCFG_KVM_SIG); |
| 183 | if (!memcmp(p: &config, KVM_SIGNATURE, size: 4)) |
| 184 | hypervisor_type = HYPERVISOR_KVM; |
| 185 | } |
| 186 | |
| 187 | return hypervisor_type == HYPERVISOR_KVM; |
| 188 | } |
| 189 | |
| 190 | unsigned int kvm_arch_para_features(void) |
| 191 | { |
| 192 | static unsigned int feature; |
| 193 | |
| 194 | if (!kvm_para_available()) |
| 195 | return 0; |
| 196 | |
| 197 | if (!feature) |
| 198 | feature = read_cpucfg(CPUCFG_KVM_FEATURE); |
| 199 | |
| 200 | return feature; |
| 201 | } |
| 202 | |
| 203 | int __init pv_ipi_init(void) |
| 204 | { |
| 205 | if (!kvm_para_has_feature(feature: KVM_FEATURE_IPI)) |
| 206 | return 0; |
| 207 | |
| 208 | #ifdef CONFIG_SMP |
| 209 | native_ops = mp_ops; |
| 210 | mp_ops.init_ipi = pv_init_ipi; |
| 211 | mp_ops.send_ipi_single = pv_send_ipi_single; |
| 212 | mp_ops.send_ipi_mask = pv_send_ipi_mask; |
| 213 | #endif |
| 214 | |
| 215 | return 0; |
| 216 | } |
| 217 | |
| 218 | static int pv_enable_steal_time(void) |
| 219 | { |
| 220 | int cpu = smp_processor_id(); |
| 221 | unsigned long addr; |
| 222 | struct kvm_steal_time *st; |
| 223 | |
| 224 | if (!has_steal_clock) |
| 225 | return -EPERM; |
| 226 | |
| 227 | st = &per_cpu(steal_time, cpu); |
| 228 | addr = per_cpu_ptr_to_phys(addr: st); |
| 229 | |
| 230 | /* The whole structure kvm_steal_time should be in one page */ |
| 231 | if (PFN_DOWN(addr) != PFN_DOWN(addr + sizeof(*st))) { |
| 232 | pr_warn("Illegal PV steal time addr %lx\n" , addr); |
| 233 | return -EFAULT; |
| 234 | } |
| 235 | |
| 236 | addr |= KVM_STEAL_PHYS_VALID; |
| 237 | kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), addr); |
| 238 | |
| 239 | return 0; |
| 240 | } |
| 241 | |
| 242 | static void pv_disable_steal_time(void) |
| 243 | { |
| 244 | if (has_steal_clock) |
| 245 | kvm_hypercall2(KVM_HCALL_FUNC_NOTIFY, BIT(KVM_FEATURE_STEAL_TIME), 0); |
| 246 | } |
| 247 | |
| 248 | #ifdef CONFIG_SMP |
| 249 | static int pv_time_cpu_online(unsigned int cpu) |
| 250 | { |
| 251 | unsigned long flags; |
| 252 | |
| 253 | local_irq_save(flags); |
| 254 | pv_enable_steal_time(); |
| 255 | local_irq_restore(flags); |
| 256 | |
| 257 | return 0; |
| 258 | } |
| 259 | |
| 260 | static int pv_time_cpu_down_prepare(unsigned int cpu) |
| 261 | { |
| 262 | unsigned long flags; |
| 263 | |
| 264 | local_irq_save(flags); |
| 265 | pv_disable_steal_time(); |
| 266 | local_irq_restore(flags); |
| 267 | |
| 268 | return 0; |
| 269 | } |
| 270 | #endif |
| 271 | |
| 272 | static void pv_cpu_reboot(void *unused) |
| 273 | { |
| 274 | pv_disable_steal_time(); |
| 275 | } |
| 276 | |
| 277 | static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, void *unused) |
| 278 | { |
| 279 | on_each_cpu(func: pv_cpu_reboot, NULL, wait: 1); |
| 280 | return NOTIFY_DONE; |
| 281 | } |
| 282 | |
| 283 | static struct notifier_block pv_reboot_nb = { |
| 284 | .notifier_call = pv_reboot_notify, |
| 285 | }; |
| 286 | |
| 287 | int __init pv_time_init(void) |
| 288 | { |
| 289 | int r; |
| 290 | |
| 291 | if (!kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) |
| 292 | return 0; |
| 293 | |
| 294 | has_steal_clock = 1; |
| 295 | r = pv_enable_steal_time(); |
| 296 | if (r < 0) { |
| 297 | has_steal_clock = 0; |
| 298 | return 0; |
| 299 | } |
| 300 | register_reboot_notifier(&pv_reboot_nb); |
| 301 | |
| 302 | #ifdef CONFIG_SMP |
| 303 | r = cpuhp_setup_state_nocalls(state: CPUHP_AP_ONLINE_DYN, |
| 304 | name: "loongarch/pv_time:online" , |
| 305 | startup: pv_time_cpu_online, teardown: pv_time_cpu_down_prepare); |
| 306 | if (r < 0) { |
| 307 | has_steal_clock = 0; |
| 308 | pr_err("Failed to install cpu hotplug callbacks\n" ); |
| 309 | return r; |
| 310 | } |
| 311 | #endif |
| 312 | |
| 313 | static_call_update(pv_steal_clock, paravt_steal_clock); |
| 314 | |
| 315 | static_key_slow_inc(key: ¶virt_steal_enabled); |
| 316 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING |
| 317 | if (steal_acc) |
| 318 | static_key_slow_inc(key: ¶virt_steal_rq_enabled); |
| 319 | #endif |
| 320 | |
| 321 | pr_info("Using paravirt steal-time\n" ); |
| 322 | |
| 323 | return 0; |
| 324 | } |
| 325 | |
| 326 | int __init pv_spinlock_init(void) |
| 327 | { |
| 328 | if (!cpu_has_hypervisor) |
| 329 | return 0; |
| 330 | |
| 331 | static_branch_enable(&virt_spin_lock_key); |
| 332 | |
| 333 | return 0; |
| 334 | } |
| 335 | |