1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * VFIO generic eventfd code for IRQFD support. |
4 | * Derived from drivers/vfio/pci/vfio_pci_intrs.c |
5 | * |
6 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. |
7 | * Author: Alex Williamson <alex.williamson@redhat.com> |
8 | */ |
9 | |
10 | #include <linux/vfio.h> |
11 | #include <linux/eventfd.h> |
12 | #include <linux/file.h> |
13 | #include <linux/module.h> |
14 | #include <linux/slab.h> |
15 | #include "vfio.h" |
16 | |
17 | static struct workqueue_struct *vfio_irqfd_cleanup_wq; |
18 | static DEFINE_SPINLOCK(virqfd_lock); |
19 | |
20 | int __init vfio_virqfd_init(void) |
21 | { |
22 | vfio_irqfd_cleanup_wq = |
23 | create_singlethread_workqueue("vfio-irqfd-cleanup" ); |
24 | if (!vfio_irqfd_cleanup_wq) |
25 | return -ENOMEM; |
26 | |
27 | return 0; |
28 | } |
29 | |
30 | void vfio_virqfd_exit(void) |
31 | { |
32 | destroy_workqueue(wq: vfio_irqfd_cleanup_wq); |
33 | } |
34 | |
35 | static void virqfd_deactivate(struct virqfd *virqfd) |
36 | { |
37 | queue_work(wq: vfio_irqfd_cleanup_wq, work: &virqfd->shutdown); |
38 | } |
39 | |
40 | static int virqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) |
41 | { |
42 | struct virqfd *virqfd = container_of(wait, struct virqfd, wait); |
43 | __poll_t flags = key_to_poll(key); |
44 | |
45 | if (flags & EPOLLIN) { |
46 | u64 cnt; |
47 | eventfd_ctx_do_read(ctx: virqfd->eventfd, cnt: &cnt); |
48 | |
49 | /* An event has been signaled, call function */ |
50 | if ((!virqfd->handler || |
51 | virqfd->handler(virqfd->opaque, virqfd->data)) && |
52 | virqfd->thread) |
53 | schedule_work(work: &virqfd->inject); |
54 | } |
55 | |
56 | if (flags & EPOLLHUP) { |
57 | unsigned long flags; |
58 | spin_lock_irqsave(&virqfd_lock, flags); |
59 | |
60 | /* |
61 | * The eventfd is closing, if the virqfd has not yet been |
62 | * queued for release, as determined by testing whether the |
63 | * virqfd pointer to it is still valid, queue it now. As |
64 | * with kvm irqfds, we know we won't race against the virqfd |
65 | * going away because we hold the lock to get here. |
66 | */ |
67 | if (*(virqfd->pvirqfd) == virqfd) { |
68 | *(virqfd->pvirqfd) = NULL; |
69 | virqfd_deactivate(virqfd); |
70 | } |
71 | |
72 | spin_unlock_irqrestore(lock: &virqfd_lock, flags); |
73 | } |
74 | |
75 | return 0; |
76 | } |
77 | |
78 | static void virqfd_ptable_queue_proc(struct file *file, |
79 | wait_queue_head_t *wqh, poll_table *pt) |
80 | { |
81 | struct virqfd *virqfd = container_of(pt, struct virqfd, pt); |
82 | add_wait_queue(wq_head: wqh, wq_entry: &virqfd->wait); |
83 | } |
84 | |
85 | static void virqfd_shutdown(struct work_struct *work) |
86 | { |
87 | struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); |
88 | u64 cnt; |
89 | |
90 | eventfd_ctx_remove_wait_queue(ctx: virqfd->eventfd, wait: &virqfd->wait, cnt: &cnt); |
91 | flush_work(work: &virqfd->inject); |
92 | eventfd_ctx_put(ctx: virqfd->eventfd); |
93 | |
94 | kfree(objp: virqfd); |
95 | } |
96 | |
97 | static void virqfd_inject(struct work_struct *work) |
98 | { |
99 | struct virqfd *virqfd = container_of(work, struct virqfd, inject); |
100 | if (virqfd->thread) |
101 | virqfd->thread(virqfd->opaque, virqfd->data); |
102 | } |
103 | |
104 | static void virqfd_flush_inject(struct work_struct *work) |
105 | { |
106 | struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject); |
107 | |
108 | flush_work(work: &virqfd->inject); |
109 | } |
110 | |
111 | int vfio_virqfd_enable(void *opaque, |
112 | int (*handler)(void *, void *), |
113 | void (*thread)(void *, void *), |
114 | void *data, struct virqfd **pvirqfd, int fd) |
115 | { |
116 | struct fd irqfd; |
117 | struct eventfd_ctx *ctx; |
118 | struct virqfd *virqfd; |
119 | int ret = 0; |
120 | __poll_t events; |
121 | |
122 | virqfd = kzalloc(size: sizeof(*virqfd), GFP_KERNEL_ACCOUNT); |
123 | if (!virqfd) |
124 | return -ENOMEM; |
125 | |
126 | virqfd->pvirqfd = pvirqfd; |
127 | virqfd->opaque = opaque; |
128 | virqfd->handler = handler; |
129 | virqfd->thread = thread; |
130 | virqfd->data = data; |
131 | |
132 | INIT_WORK(&virqfd->shutdown, virqfd_shutdown); |
133 | INIT_WORK(&virqfd->inject, virqfd_inject); |
134 | INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject); |
135 | |
136 | irqfd = fdget(fd); |
137 | if (!irqfd.file) { |
138 | ret = -EBADF; |
139 | goto err_fd; |
140 | } |
141 | |
142 | ctx = eventfd_ctx_fileget(file: irqfd.file); |
143 | if (IS_ERR(ptr: ctx)) { |
144 | ret = PTR_ERR(ptr: ctx); |
145 | goto err_ctx; |
146 | } |
147 | |
148 | virqfd->eventfd = ctx; |
149 | |
150 | /* |
151 | * virqfds can be released by closing the eventfd or directly |
152 | * through ioctl. These are both done through a workqueue, so |
153 | * we update the pointer to the virqfd under lock to avoid |
154 | * pushing multiple jobs to release the same virqfd. |
155 | */ |
156 | spin_lock_irq(lock: &virqfd_lock); |
157 | |
158 | if (*pvirqfd) { |
159 | spin_unlock_irq(lock: &virqfd_lock); |
160 | ret = -EBUSY; |
161 | goto err_busy; |
162 | } |
163 | *pvirqfd = virqfd; |
164 | |
165 | spin_unlock_irq(lock: &virqfd_lock); |
166 | |
167 | /* |
168 | * Install our own custom wake-up handling so we are notified via |
169 | * a callback whenever someone signals the underlying eventfd. |
170 | */ |
171 | init_waitqueue_func_entry(wq_entry: &virqfd->wait, func: virqfd_wakeup); |
172 | init_poll_funcptr(pt: &virqfd->pt, qproc: virqfd_ptable_queue_proc); |
173 | |
174 | events = vfs_poll(file: irqfd.file, pt: &virqfd->pt); |
175 | |
176 | /* |
177 | * Check if there was an event already pending on the eventfd |
178 | * before we registered and trigger it as if we didn't miss it. |
179 | */ |
180 | if (events & EPOLLIN) { |
181 | if ((!handler || handler(opaque, data)) && thread) |
182 | schedule_work(work: &virqfd->inject); |
183 | } |
184 | |
185 | /* |
186 | * Do not drop the file until the irqfd is fully initialized, |
187 | * otherwise we might race against the EPOLLHUP. |
188 | */ |
189 | fdput(fd: irqfd); |
190 | |
191 | return 0; |
192 | err_busy: |
193 | eventfd_ctx_put(ctx); |
194 | err_ctx: |
195 | fdput(fd: irqfd); |
196 | err_fd: |
197 | kfree(objp: virqfd); |
198 | |
199 | return ret; |
200 | } |
201 | EXPORT_SYMBOL_GPL(vfio_virqfd_enable); |
202 | |
203 | void vfio_virqfd_disable(struct virqfd **pvirqfd) |
204 | { |
205 | unsigned long flags; |
206 | |
207 | spin_lock_irqsave(&virqfd_lock, flags); |
208 | |
209 | if (*pvirqfd) { |
210 | virqfd_deactivate(virqfd: *pvirqfd); |
211 | *pvirqfd = NULL; |
212 | } |
213 | |
214 | spin_unlock_irqrestore(lock: &virqfd_lock, flags); |
215 | |
216 | /* |
217 | * Block until we know all outstanding shutdown jobs have completed. |
218 | * Even if we don't queue the job, flush the wq to be sure it's |
219 | * been released. |
220 | */ |
221 | flush_workqueue(vfio_irqfd_cleanup_wq); |
222 | } |
223 | EXPORT_SYMBOL_GPL(vfio_virqfd_disable); |
224 | |
225 | void vfio_virqfd_flush_thread(struct virqfd **pvirqfd) |
226 | { |
227 | unsigned long flags; |
228 | |
229 | spin_lock_irqsave(&virqfd_lock, flags); |
230 | if (*pvirqfd && (*pvirqfd)->thread) |
231 | queue_work(wq: vfio_irqfd_cleanup_wq, work: &(*pvirqfd)->flush_inject); |
232 | spin_unlock_irqrestore(lock: &virqfd_lock, flags); |
233 | |
234 | flush_workqueue(vfio_irqfd_cleanup_wq); |
235 | } |
236 | EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread); |
237 | |