1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Virtio PCI driver - modern (virtio 1.0) device support
4 *
5 * This module allows virtio devices to be used over a virtual PCI device.
6 * This can be used with QEMU based VMMs like KVM or Xen.
7 *
8 * Copyright IBM Corp. 2007
9 * Copyright Red Hat, Inc. 2014
10 *
11 * Authors:
12 * Anthony Liguori <aliguori@us.ibm.com>
13 * Rusty Russell <rusty@rustcorp.com.au>
14 * Michael S. Tsirkin <mst@redhat.com>
15 */
16
17#include <linux/delay.h>
18#define VIRTIO_PCI_NO_LEGACY
19#define VIRTIO_RING_NO_LEGACY
20#include "virtio_pci_common.h"
21
22static u64 vp_get_features(struct virtio_device *vdev)
23{
24 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
25
26 return vp_modern_get_features(mdev: &vp_dev->mdev);
27}
28
29static void vp_transport_features(struct virtio_device *vdev, u64 features)
30{
31 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
32 struct pci_dev *pci_dev = vp_dev->pci_dev;
33
34 if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
35 pci_find_ext_capability(dev: pci_dev, PCI_EXT_CAP_ID_SRIOV))
36 __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
37
38 if (features & BIT_ULL(VIRTIO_F_RING_RESET))
39 __virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
40}
41
42static int __vp_check_common_size_one_feature(struct virtio_device *vdev, u32 fbit,
43 u32 offset, const char *fname)
44{
45 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
46
47 if (!__virtio_test_bit(vdev, fbit))
48 return 0;
49
50 if (likely(vp_dev->mdev.common_len >= offset))
51 return 0;
52
53 dev_err(&vdev->dev,
54 "virtio: common cfg size(%zu) does not match the feature %s\n",
55 vp_dev->mdev.common_len, fname);
56
57 return -EINVAL;
58}
59
60#define vp_check_common_size_one_feature(vdev, fbit, field) \
61 __vp_check_common_size_one_feature(vdev, fbit, \
62 offsetofend(struct virtio_pci_modern_common_cfg, field), #fbit)
63
64static int vp_check_common_size(struct virtio_device *vdev)
65{
66 if (vp_check_common_size_one_feature(vdev, VIRTIO_F_NOTIF_CONFIG_DATA, queue_notify_data))
67 return -EINVAL;
68
69 if (vp_check_common_size_one_feature(vdev, VIRTIO_F_RING_RESET, queue_reset))
70 return -EINVAL;
71
72 return 0;
73}
74
75/* virtio config->finalize_features() implementation */
76static int vp_finalize_features(struct virtio_device *vdev)
77{
78 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
79 u64 features = vdev->features;
80
81 /* Give virtio_ring a chance to accept features. */
82 vring_transport_features(vdev);
83
84 /* Give virtio_pci a chance to accept features. */
85 vp_transport_features(vdev, features);
86
87 if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
88 dev_err(&vdev->dev, "virtio: device uses modern interface "
89 "but does not have VIRTIO_F_VERSION_1\n");
90 return -EINVAL;
91 }
92
93 if (vp_check_common_size(vdev))
94 return -EINVAL;
95
96 vp_modern_set_features(mdev: &vp_dev->mdev, features: vdev->features);
97
98 return 0;
99}
100
101/* virtio config->get() implementation */
102static void vp_get(struct virtio_device *vdev, unsigned int offset,
103 void *buf, unsigned int len)
104{
105 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
106 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
107 void __iomem *device = mdev->device;
108 u8 b;
109 __le16 w;
110 __le32 l;
111
112 BUG_ON(offset + len > mdev->device_len);
113
114 switch (len) {
115 case 1:
116 b = ioread8(device + offset);
117 memcpy(buf, &b, sizeof b);
118 break;
119 case 2:
120 w = cpu_to_le16(ioread16(device + offset));
121 memcpy(buf, &w, sizeof w);
122 break;
123 case 4:
124 l = cpu_to_le32(ioread32(device + offset));
125 memcpy(buf, &l, sizeof l);
126 break;
127 case 8:
128 l = cpu_to_le32(ioread32(device + offset));
129 memcpy(buf, &l, sizeof l);
130 l = cpu_to_le32(ioread32(device + offset + sizeof l));
131 memcpy(buf + sizeof l, &l, sizeof l);
132 break;
133 default:
134 BUG();
135 }
136}
137
138/* the config->set() implementation. it's symmetric to the config->get()
139 * implementation */
140static void vp_set(struct virtio_device *vdev, unsigned int offset,
141 const void *buf, unsigned int len)
142{
143 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
144 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
145 void __iomem *device = mdev->device;
146 u8 b;
147 __le16 w;
148 __le32 l;
149
150 BUG_ON(offset + len > mdev->device_len);
151
152 switch (len) {
153 case 1:
154 memcpy(&b, buf, sizeof b);
155 iowrite8(b, device + offset);
156 break;
157 case 2:
158 memcpy(&w, buf, sizeof w);
159 iowrite16(le16_to_cpu(w), device + offset);
160 break;
161 case 4:
162 memcpy(&l, buf, sizeof l);
163 iowrite32(le32_to_cpu(l), device + offset);
164 break;
165 case 8:
166 memcpy(&l, buf, sizeof l);
167 iowrite32(le32_to_cpu(l), device + offset);
168 memcpy(&l, buf + sizeof l, sizeof l);
169 iowrite32(le32_to_cpu(l), device + offset + sizeof l);
170 break;
171 default:
172 BUG();
173 }
174}
175
176static u32 vp_generation(struct virtio_device *vdev)
177{
178 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
179
180 return vp_modern_generation(mdev: &vp_dev->mdev);
181}
182
183/* config->{get,set}_status() implementations */
184static u8 vp_get_status(struct virtio_device *vdev)
185{
186 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
187
188 return vp_modern_get_status(mdev: &vp_dev->mdev);
189}
190
191static void vp_set_status(struct virtio_device *vdev, u8 status)
192{
193 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
194
195 /* We should never be setting status to 0. */
196 BUG_ON(status == 0);
197 vp_modern_set_status(mdev: &vp_dev->mdev, status);
198}
199
200static void vp_reset(struct virtio_device *vdev)
201{
202 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
203 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
204
205 /* 0 status means a reset. */
206 vp_modern_set_status(mdev, status: 0);
207 /* After writing 0 to device_status, the driver MUST wait for a read of
208 * device_status to return 0 before reinitializing the device.
209 * This will flush out the status write, and flush in device writes,
210 * including MSI-X interrupts, if any.
211 */
212 while (vp_modern_get_status(mdev))
213 msleep(msecs: 1);
214 /* Flush pending VQ/configuration callbacks. */
215 vp_synchronize_vectors(vdev);
216}
217
218static int vp_active_vq(struct virtqueue *vq, u16 msix_vec)
219{
220 struct virtio_pci_device *vp_dev = to_vp_device(vdev: vq->vdev);
221 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
222 unsigned long index;
223
224 index = vq->index;
225
226 /* activate the queue */
227 vp_modern_set_queue_size(mdev, idx: index, size: virtqueue_get_vring_size(vq));
228 vp_modern_queue_address(mdev, index, desc_addr: virtqueue_get_desc_addr(vq),
229 driver_addr: virtqueue_get_avail_addr(vq),
230 device_addr: virtqueue_get_used_addr(vq));
231
232 if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
233 msix_vec = vp_modern_queue_vector(mdev, idx: index, vector: msix_vec);
234 if (msix_vec == VIRTIO_MSI_NO_VECTOR)
235 return -EBUSY;
236 }
237
238 return 0;
239}
240
241static int vp_modern_disable_vq_and_reset(struct virtqueue *vq)
242{
243 struct virtio_pci_device *vp_dev = to_vp_device(vdev: vq->vdev);
244 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
245 struct virtio_pci_vq_info *info;
246 unsigned long flags;
247
248 if (!virtio_has_feature(vdev: vq->vdev, VIRTIO_F_RING_RESET))
249 return -ENOENT;
250
251 vp_modern_set_queue_reset(mdev, index: vq->index);
252
253 info = vp_dev->vqs[vq->index];
254
255 /* delete vq from irq handler */
256 spin_lock_irqsave(&vp_dev->lock, flags);
257 list_del(entry: &info->node);
258 spin_unlock_irqrestore(lock: &vp_dev->lock, flags);
259
260 INIT_LIST_HEAD(list: &info->node);
261
262#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
263 __virtqueue_break(vq);
264#endif
265
266 /* For the case where vq has an exclusive irq, call synchronize_irq() to
267 * wait for completion.
268 *
269 * note: We can't use disable_irq() since it conflicts with the affinity
270 * managed IRQ that is used by some drivers.
271 */
272 if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
273 synchronize_irq(irq: pci_irq_vector(dev: vp_dev->pci_dev, nr: info->msix_vector));
274
275 vq->reset = true;
276
277 return 0;
278}
279
280static int vp_modern_enable_vq_after_reset(struct virtqueue *vq)
281{
282 struct virtio_pci_device *vp_dev = to_vp_device(vdev: vq->vdev);
283 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
284 struct virtio_pci_vq_info *info;
285 unsigned long flags, index;
286 int err;
287
288 if (!vq->reset)
289 return -EBUSY;
290
291 index = vq->index;
292 info = vp_dev->vqs[index];
293
294 if (vp_modern_get_queue_reset(mdev, index))
295 return -EBUSY;
296
297 if (vp_modern_get_queue_enable(mdev, idx: index))
298 return -EBUSY;
299
300 err = vp_active_vq(vq, msix_vec: info->msix_vector);
301 if (err)
302 return err;
303
304 if (vq->callback) {
305 spin_lock_irqsave(&vp_dev->lock, flags);
306 list_add(new: &info->node, head: &vp_dev->virtqueues);
307 spin_unlock_irqrestore(lock: &vp_dev->lock, flags);
308 } else {
309 INIT_LIST_HEAD(list: &info->node);
310 }
311
312#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
313 __virtqueue_unbreak(vq);
314#endif
315
316 vp_modern_set_queue_enable(mdev: &vp_dev->mdev, idx: index, enable: true);
317 vq->reset = false;
318
319 return 0;
320}
321
322static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
323{
324 return vp_modern_config_vector(mdev: &vp_dev->mdev, vector);
325}
326
327static bool vp_notify_with_data(struct virtqueue *vq)
328{
329 u32 data = vring_notification_data(vq: vq);
330
331 iowrite32(data, (void __iomem *)vq->priv);
332
333 return true;
334}
335
336static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
337 struct virtio_pci_vq_info *info,
338 unsigned int index,
339 void (*callback)(struct virtqueue *vq),
340 const char *name,
341 bool ctx,
342 u16 msix_vec)
343{
344
345 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
346 bool (*notify)(struct virtqueue *vq);
347 struct virtqueue *vq;
348 u16 num;
349 int err;
350
351 if (__virtio_test_bit(vdev: &vp_dev->vdev, VIRTIO_F_NOTIFICATION_DATA))
352 notify = vp_notify_with_data;
353 else
354 notify = vp_notify;
355
356 if (index >= vp_modern_get_num_queues(mdev))
357 return ERR_PTR(error: -EINVAL);
358
359 /* Check if queue is either not available or already active. */
360 num = vp_modern_get_queue_size(mdev, idx: index);
361 if (!num || vp_modern_get_queue_enable(mdev, idx: index))
362 return ERR_PTR(error: -ENOENT);
363
364 info->msix_vector = msix_vec;
365
366 /* create the vring */
367 vq = vring_create_virtqueue(index, num,
368 SMP_CACHE_BYTES, vdev: &vp_dev->vdev,
369 weak_barriers: true, may_reduce_num: true, ctx,
370 notify, callback, name);
371 if (!vq)
372 return ERR_PTR(error: -ENOMEM);
373
374 vq->num_max = num;
375
376 err = vp_active_vq(vq, msix_vec);
377 if (err)
378 goto err;
379
380 vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
381 if (!vq->priv) {
382 err = -ENOMEM;
383 goto err;
384 }
385
386 return vq;
387
388err:
389 vring_del_virtqueue(vq);
390 return ERR_PTR(error: err);
391}
392
393static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
394 struct virtqueue *vqs[],
395 vq_callback_t *callbacks[],
396 const char * const names[], const bool *ctx,
397 struct irq_affinity *desc)
398{
399 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
400 struct virtqueue *vq;
401 int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, desc);
402
403 if (rc)
404 return rc;
405
406 /* Select and activate all queues. Has to be done last: once we do
407 * this, there's no way to go back except reset.
408 */
409 list_for_each_entry(vq, &vdev->vqs, list)
410 vp_modern_set_queue_enable(mdev: &vp_dev->mdev, idx: vq->index, enable: true);
411
412 return 0;
413}
414
415static void del_vq(struct virtio_pci_vq_info *info)
416{
417 struct virtqueue *vq = info->vq;
418 struct virtio_pci_device *vp_dev = to_vp_device(vdev: vq->vdev);
419 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
420
421 if (vp_dev->msix_enabled)
422 vp_modern_queue_vector(mdev, idx: vq->index,
423 VIRTIO_MSI_NO_VECTOR);
424
425 if (!mdev->notify_base)
426 pci_iounmap(dev: mdev->pci_dev, (void __force __iomem *)vq->priv);
427
428 vring_del_virtqueue(vq);
429}
430
431static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id,
432 u8 *bar, u64 *offset, u64 *len)
433{
434 int pos;
435
436 for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); pos > 0;
437 pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
438 u8 type, cap_len, id, res_bar;
439 u32 tmp32;
440 u64 res_offset, res_length;
441
442 pci_read_config_byte(dev, where: pos + offsetof(struct virtio_pci_cap,
443 cfg_type), val: &type);
444 if (type != VIRTIO_PCI_CAP_SHARED_MEMORY_CFG)
445 continue;
446
447 pci_read_config_byte(dev, where: pos + offsetof(struct virtio_pci_cap,
448 cap_len), val: &cap_len);
449 if (cap_len != sizeof(struct virtio_pci_cap64)) {
450 dev_err(&dev->dev, "%s: shm cap with bad size offset:"
451 " %d size: %d\n", __func__, pos, cap_len);
452 continue;
453 }
454
455 pci_read_config_byte(dev, where: pos + offsetof(struct virtio_pci_cap,
456 id), val: &id);
457 if (id != required_id)
458 continue;
459
460 pci_read_config_byte(dev, where: pos + offsetof(struct virtio_pci_cap,
461 bar), val: &res_bar);
462 if (res_bar >= PCI_STD_NUM_BARS)
463 continue;
464
465 /* Type and ID match, and the BAR value isn't reserved.
466 * Looks good.
467 */
468
469 /* Read the lower 32bit of length and offset */
470 pci_read_config_dword(dev, where: pos + offsetof(struct virtio_pci_cap,
471 offset), val: &tmp32);
472 res_offset = tmp32;
473 pci_read_config_dword(dev, where: pos + offsetof(struct virtio_pci_cap,
474 length), val: &tmp32);
475 res_length = tmp32;
476
477 /* and now the top half */
478 pci_read_config_dword(dev,
479 where: pos + offsetof(struct virtio_pci_cap64,
480 offset_hi), val: &tmp32);
481 res_offset |= ((u64)tmp32) << 32;
482 pci_read_config_dword(dev,
483 where: pos + offsetof(struct virtio_pci_cap64,
484 length_hi), val: &tmp32);
485 res_length |= ((u64)tmp32) << 32;
486
487 *bar = res_bar;
488 *offset = res_offset;
489 *len = res_length;
490
491 return pos;
492 }
493 return 0;
494}
495
496static bool vp_get_shm_region(struct virtio_device *vdev,
497 struct virtio_shm_region *region, u8 id)
498{
499 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
500 struct pci_dev *pci_dev = vp_dev->pci_dev;
501 u8 bar;
502 u64 offset, len;
503 phys_addr_t phys_addr;
504 size_t bar_len;
505
506 if (!virtio_pci_find_shm_cap(dev: pci_dev, required_id: id, bar: &bar, offset: &offset, len: &len))
507 return false;
508
509 phys_addr = pci_resource_start(pci_dev, bar);
510 bar_len = pci_resource_len(pci_dev, bar);
511
512 if ((offset + len) < offset) {
513 dev_err(&pci_dev->dev, "%s: cap offset+len overflow detected\n",
514 __func__);
515 return false;
516 }
517
518 if (offset + len > bar_len) {
519 dev_err(&pci_dev->dev, "%s: bar shorter than cap offset+len\n",
520 __func__);
521 return false;
522 }
523
524 region->len = len;
525 region->addr = (u64) phys_addr + offset;
526
527 return true;
528}
529
530static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
531 .get = NULL,
532 .set = NULL,
533 .generation = vp_generation,
534 .get_status = vp_get_status,
535 .set_status = vp_set_status,
536 .reset = vp_reset,
537 .find_vqs = vp_modern_find_vqs,
538 .del_vqs = vp_del_vqs,
539 .synchronize_cbs = vp_synchronize_vectors,
540 .get_features = vp_get_features,
541 .finalize_features = vp_finalize_features,
542 .bus_name = vp_bus_name,
543 .set_vq_affinity = vp_set_vq_affinity,
544 .get_vq_affinity = vp_get_vq_affinity,
545 .get_shm_region = vp_get_shm_region,
546 .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
547 .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
548};
549
550static const struct virtio_config_ops virtio_pci_config_ops = {
551 .get = vp_get,
552 .set = vp_set,
553 .generation = vp_generation,
554 .get_status = vp_get_status,
555 .set_status = vp_set_status,
556 .reset = vp_reset,
557 .find_vqs = vp_modern_find_vqs,
558 .del_vqs = vp_del_vqs,
559 .synchronize_cbs = vp_synchronize_vectors,
560 .get_features = vp_get_features,
561 .finalize_features = vp_finalize_features,
562 .bus_name = vp_bus_name,
563 .set_vq_affinity = vp_set_vq_affinity,
564 .get_vq_affinity = vp_get_vq_affinity,
565 .get_shm_region = vp_get_shm_region,
566 .disable_vq_and_reset = vp_modern_disable_vq_and_reset,
567 .enable_vq_after_reset = vp_modern_enable_vq_after_reset,
568};
569
570/* the PCI probing function */
571int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
572{
573 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
574 struct pci_dev *pci_dev = vp_dev->pci_dev;
575 int err;
576
577 mdev->pci_dev = pci_dev;
578
579 err = vp_modern_probe(mdev);
580 if (err)
581 return err;
582
583 if (mdev->device)
584 vp_dev->vdev.config = &virtio_pci_config_ops;
585 else
586 vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
587
588 vp_dev->config_vector = vp_config_vector;
589 vp_dev->setup_vq = setup_vq;
590 vp_dev->del_vq = del_vq;
591 vp_dev->isr = mdev->isr;
592 vp_dev->vdev.id = mdev->id;
593
594 return 0;
595}
596
597void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
598{
599 struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
600
601 vp_modern_remove(mdev);
602}
603

source code of linux/drivers/virtio/virtio_pci_modern.c