1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <linux/kernel.h> |
3 | #include <linux/errno.h> |
4 | #include <linux/file.h> |
5 | #include <linux/slab.h> |
6 | #include <linux/nospec.h> |
7 | #include <linux/io_uring.h> |
8 | |
9 | #include <uapi/linux/io_uring.h> |
10 | |
11 | #include "io_uring.h" |
12 | #include "rsrc.h" |
13 | #include "filetable.h" |
14 | #include "msg_ring.h" |
15 | |
16 | |
17 | /* All valid masks for MSG_RING */ |
18 | #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ |
19 | IORING_MSG_RING_FLAGS_PASS) |
20 | |
21 | struct io_msg { |
22 | struct file *file; |
23 | struct file *src_file; |
24 | struct callback_head tw; |
25 | u64 user_data; |
26 | u32 len; |
27 | u32 cmd; |
28 | u32 src_fd; |
29 | union { |
30 | u32 dst_fd; |
31 | u32 cqe_flags; |
32 | }; |
33 | u32 flags; |
34 | }; |
35 | |
36 | static void io_double_unlock_ctx(struct io_ring_ctx *octx) |
37 | { |
38 | mutex_unlock(lock: &octx->uring_lock); |
39 | } |
40 | |
41 | static int io_double_lock_ctx(struct io_ring_ctx *octx, |
42 | unsigned int issue_flags) |
43 | { |
44 | /* |
45 | * To ensure proper ordering between the two ctxs, we can only |
46 | * attempt a trylock on the target. If that fails and we already have |
47 | * the source ctx lock, punt to io-wq. |
48 | */ |
49 | if (!(issue_flags & IO_URING_F_UNLOCKED)) { |
50 | if (!mutex_trylock(lock: &octx->uring_lock)) |
51 | return -EAGAIN; |
52 | return 0; |
53 | } |
54 | mutex_lock(&octx->uring_lock); |
55 | return 0; |
56 | } |
57 | |
58 | void io_msg_ring_cleanup(struct io_kiocb *req) |
59 | { |
60 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
61 | |
62 | if (WARN_ON_ONCE(!msg->src_file)) |
63 | return; |
64 | |
65 | fput(msg->src_file); |
66 | msg->src_file = NULL; |
67 | } |
68 | |
69 | static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) |
70 | { |
71 | if (!target_ctx->task_complete) |
72 | return false; |
73 | return current != target_ctx->submitter_task; |
74 | } |
75 | |
76 | static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func) |
77 | { |
78 | struct io_ring_ctx *ctx = req->file->private_data; |
79 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
80 | struct task_struct *task = READ_ONCE(ctx->submitter_task); |
81 | |
82 | if (unlikely(!task)) |
83 | return -EOWNERDEAD; |
84 | |
85 | init_task_work(twork: &msg->tw, func); |
86 | if (task_work_add(task: ctx->submitter_task, twork: &msg->tw, mode: TWA_SIGNAL)) |
87 | return -EOWNERDEAD; |
88 | |
89 | return IOU_ISSUE_SKIP_COMPLETE; |
90 | } |
91 | |
92 | static void io_msg_tw_complete(struct callback_head *head) |
93 | { |
94 | struct io_msg *msg = container_of(head, struct io_msg, tw); |
95 | struct io_kiocb *req = cmd_to_io_kiocb(msg); |
96 | struct io_ring_ctx *target_ctx = req->file->private_data; |
97 | int ret = 0; |
98 | |
99 | if (current->flags & PF_EXITING) { |
100 | ret = -EOWNERDEAD; |
101 | } else { |
102 | u32 flags = 0; |
103 | |
104 | if (msg->flags & IORING_MSG_RING_FLAGS_PASS) |
105 | flags = msg->cqe_flags; |
106 | |
107 | /* |
108 | * If the target ring is using IOPOLL mode, then we need to be |
109 | * holding the uring_lock for posting completions. Other ring |
110 | * types rely on the regular completion locking, which is |
111 | * handled while posting. |
112 | */ |
113 | if (target_ctx->flags & IORING_SETUP_IOPOLL) |
114 | mutex_lock(&target_ctx->uring_lock); |
115 | if (!io_post_aux_cqe(ctx: target_ctx, user_data: msg->user_data, res: msg->len, cflags: flags)) |
116 | ret = -EOVERFLOW; |
117 | if (target_ctx->flags & IORING_SETUP_IOPOLL) |
118 | mutex_unlock(lock: &target_ctx->uring_lock); |
119 | } |
120 | |
121 | if (ret < 0) |
122 | req_set_fail(req); |
123 | io_req_queue_tw_complete(req, res: ret); |
124 | } |
125 | |
126 | static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags) |
127 | { |
128 | struct io_ring_ctx *target_ctx = req->file->private_data; |
129 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
130 | u32 flags = 0; |
131 | int ret; |
132 | |
133 | if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS) |
134 | return -EINVAL; |
135 | if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd) |
136 | return -EINVAL; |
137 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
138 | return -EBADFD; |
139 | |
140 | if (io_msg_need_remote(target_ctx)) |
141 | return io_msg_exec_remote(req, func: io_msg_tw_complete); |
142 | |
143 | if (msg->flags & IORING_MSG_RING_FLAGS_PASS) |
144 | flags = msg->cqe_flags; |
145 | |
146 | ret = -EOVERFLOW; |
147 | if (target_ctx->flags & IORING_SETUP_IOPOLL) { |
148 | if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) |
149 | return -EAGAIN; |
150 | if (io_post_aux_cqe(ctx: target_ctx, user_data: msg->user_data, res: msg->len, cflags: flags)) |
151 | ret = 0; |
152 | io_double_unlock_ctx(octx: target_ctx); |
153 | } else { |
154 | if (io_post_aux_cqe(ctx: target_ctx, user_data: msg->user_data, res: msg->len, cflags: flags)) |
155 | ret = 0; |
156 | } |
157 | return ret; |
158 | } |
159 | |
160 | static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags) |
161 | { |
162 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
163 | struct io_ring_ctx *ctx = req->ctx; |
164 | struct file *file = NULL; |
165 | int idx = msg->src_fd; |
166 | |
167 | io_ring_submit_lock(ctx, issue_flags); |
168 | if (likely(idx < ctx->nr_user_files)) { |
169 | idx = array_index_nospec(idx, ctx->nr_user_files); |
170 | file = io_file_from_index(table: &ctx->file_table, index: idx); |
171 | if (file) |
172 | get_file(f: file); |
173 | } |
174 | io_ring_submit_unlock(ctx, issue_flags); |
175 | return file; |
176 | } |
177 | |
178 | static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags) |
179 | { |
180 | struct io_ring_ctx *target_ctx = req->file->private_data; |
181 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
182 | struct file *src_file = msg->src_file; |
183 | int ret; |
184 | |
185 | if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) |
186 | return -EAGAIN; |
187 | |
188 | ret = __io_fixed_fd_install(ctx: target_ctx, file: src_file, file_slot: msg->dst_fd); |
189 | if (ret < 0) |
190 | goto out_unlock; |
191 | |
192 | msg->src_file = NULL; |
193 | req->flags &= ~REQ_F_NEED_CLEANUP; |
194 | |
195 | if (msg->flags & IORING_MSG_RING_CQE_SKIP) |
196 | goto out_unlock; |
197 | /* |
198 | * If this fails, the target still received the file descriptor but |
199 | * wasn't notified of the fact. This means that if this request |
200 | * completes with -EOVERFLOW, then the sender must ensure that a |
201 | * later IORING_OP_MSG_RING delivers the message. |
202 | */ |
203 | if (!io_post_aux_cqe(ctx: target_ctx, user_data: msg->user_data, res: ret, cflags: 0)) |
204 | ret = -EOVERFLOW; |
205 | out_unlock: |
206 | io_double_unlock_ctx(octx: target_ctx); |
207 | return ret; |
208 | } |
209 | |
210 | static void io_msg_tw_fd_complete(struct callback_head *head) |
211 | { |
212 | struct io_msg *msg = container_of(head, struct io_msg, tw); |
213 | struct io_kiocb *req = cmd_to_io_kiocb(msg); |
214 | int ret = -EOWNERDEAD; |
215 | |
216 | if (!(current->flags & PF_EXITING)) |
217 | ret = io_msg_install_complete(req, issue_flags: IO_URING_F_UNLOCKED); |
218 | if (ret < 0) |
219 | req_set_fail(req); |
220 | io_req_queue_tw_complete(req, res: ret); |
221 | } |
222 | |
223 | static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) |
224 | { |
225 | struct io_ring_ctx *target_ctx = req->file->private_data; |
226 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
227 | struct io_ring_ctx *ctx = req->ctx; |
228 | struct file *src_file = msg->src_file; |
229 | |
230 | if (msg->len) |
231 | return -EINVAL; |
232 | if (target_ctx == ctx) |
233 | return -EINVAL; |
234 | if (target_ctx->flags & IORING_SETUP_R_DISABLED) |
235 | return -EBADFD; |
236 | if (!src_file) { |
237 | src_file = io_msg_grab_file(req, issue_flags); |
238 | if (!src_file) |
239 | return -EBADF; |
240 | msg->src_file = src_file; |
241 | req->flags |= REQ_F_NEED_CLEANUP; |
242 | } |
243 | |
244 | if (io_msg_need_remote(target_ctx)) |
245 | return io_msg_exec_remote(req, func: io_msg_tw_fd_complete); |
246 | return io_msg_install_complete(req, issue_flags); |
247 | } |
248 | |
249 | int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) |
250 | { |
251 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
252 | |
253 | if (unlikely(sqe->buf_index || sqe->personality)) |
254 | return -EINVAL; |
255 | |
256 | msg->src_file = NULL; |
257 | msg->user_data = READ_ONCE(sqe->off); |
258 | msg->len = READ_ONCE(sqe->len); |
259 | msg->cmd = READ_ONCE(sqe->addr); |
260 | msg->src_fd = READ_ONCE(sqe->addr3); |
261 | msg->dst_fd = READ_ONCE(sqe->file_index); |
262 | msg->flags = READ_ONCE(sqe->msg_ring_flags); |
263 | if (msg->flags & ~IORING_MSG_RING_MASK) |
264 | return -EINVAL; |
265 | |
266 | return 0; |
267 | } |
268 | |
269 | int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) |
270 | { |
271 | struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); |
272 | int ret; |
273 | |
274 | ret = -EBADFD; |
275 | if (!io_is_uring_fops(file: req->file)) |
276 | goto done; |
277 | |
278 | switch (msg->cmd) { |
279 | case IORING_MSG_DATA: |
280 | ret = io_msg_ring_data(req, issue_flags); |
281 | break; |
282 | case IORING_MSG_SEND_FD: |
283 | ret = io_msg_send_fd(req, issue_flags); |
284 | break; |
285 | default: |
286 | ret = -EINVAL; |
287 | break; |
288 | } |
289 | |
290 | done: |
291 | if (ret < 0) { |
292 | if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE) |
293 | return ret; |
294 | req_set_fail(req); |
295 | } |
296 | io_req_set_res(req, res: ret, cflags: 0); |
297 | return IOU_OK; |
298 | } |
299 | |