1 | /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ |
2 | /* |
3 | * Header file for the io_uring interface. |
4 | * |
5 | * Copyright (C) 2019 Jens Axboe |
6 | * Copyright (C) 2019 Christoph Hellwig |
7 | */ |
8 | #ifndef LINUX_IO_URING_H |
9 | #define LINUX_IO_URING_H |
10 | |
11 | #include <linux/fs.h> |
12 | #include <linux/types.h> |
13 | /* |
14 | * this file is shared with liburing and that has to autodetect |
15 | * if linux/time_types.h is available or not, it can |
16 | * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H |
17 | * if linux/time_types.h is not available |
18 | */ |
19 | #ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H |
20 | #include <linux/time_types.h> |
21 | #endif |
22 | |
23 | #ifdef __cplusplus |
24 | extern "C" { |
25 | #endif |
26 | |
27 | /* |
28 | * IO submission data structure (Submission Queue Entry) |
29 | */ |
30 | struct io_uring_sqe { |
31 | __u8 opcode; /* type of operation for this sqe */ |
32 | __u8 flags; /* IOSQE_ flags */ |
33 | __u16 ioprio; /* ioprio for the request */ |
34 | __s32 fd; /* file descriptor to do IO on */ |
35 | union { |
36 | __u64 off; /* offset into file */ |
37 | __u64 addr2; |
38 | struct { |
39 | __u32 cmd_op; |
40 | __u32 __pad1; |
41 | }; |
42 | }; |
43 | union { |
44 | __u64 addr; /* pointer to buffer or iovecs */ |
45 | __u64 splice_off_in; |
46 | struct { |
47 | __u32 level; |
48 | __u32 optname; |
49 | }; |
50 | }; |
51 | __u32 len; /* buffer size or number of iovecs */ |
52 | union { |
53 | __kernel_rwf_t rw_flags; |
54 | __u32 fsync_flags; |
55 | __u16 poll_events; /* compatibility */ |
56 | __u32 poll32_events; /* word-reversed for BE */ |
57 | __u32 sync_range_flags; |
58 | __u32 msg_flags; |
59 | __u32 timeout_flags; |
60 | __u32 accept_flags; |
61 | __u32 cancel_flags; |
62 | __u32 open_flags; |
63 | __u32 statx_flags; |
64 | __u32 fadvise_advice; |
65 | __u32 splice_flags; |
66 | __u32 rename_flags; |
67 | __u32 unlink_flags; |
68 | __u32 hardlink_flags; |
69 | __u32 xattr_flags; |
70 | __u32 msg_ring_flags; |
71 | __u32 uring_cmd_flags; |
72 | __u32 waitid_flags; |
73 | __u32 futex_flags; |
74 | }; |
75 | __u64 user_data; /* data to be passed back at completion time */ |
76 | /* pack this to avoid bogus arm OABI complaints */ |
77 | union { |
78 | /* index into fixed buffers, if used */ |
79 | __u16 buf_index; |
80 | /* for grouped buffer selection */ |
81 | __u16 buf_group; |
82 | } __attribute__((packed)); |
83 | /* personality to use, if used */ |
84 | __u16 personality; |
85 | union { |
86 | __s32 splice_fd_in; |
87 | __u32 file_index; |
88 | __u32 optlen; |
89 | struct { |
90 | __u16 addr_len; |
91 | __u16 __pad3[1]; |
92 | }; |
93 | }; |
94 | union { |
95 | struct { |
96 | __u64 addr3; |
97 | __u64 __pad2[1]; |
98 | }; |
99 | __u64 optval; |
100 | /* |
101 | * If the ring is initialized with IORING_SETUP_SQE128, then |
102 | * this field is used for 80 bytes of arbitrary command data |
103 | */ |
104 | __u8 cmd[0]; |
105 | }; |
106 | }; |
107 | |
108 | /* |
109 | * If sqe->file_index is set to this for opcodes that instantiate a new |
110 | * direct descriptor (like openat/openat2/accept), then io_uring will allocate |
111 | * an available direct descriptor instead of having the application pass one |
112 | * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE |
113 | * if the space is full. |
114 | */ |
115 | #define IORING_FILE_INDEX_ALLOC (~0U) |
116 | |
117 | enum { |
118 | IOSQE_FIXED_FILE_BIT, |
119 | IOSQE_IO_DRAIN_BIT, |
120 | IOSQE_IO_LINK_BIT, |
121 | IOSQE_IO_HARDLINK_BIT, |
122 | IOSQE_ASYNC_BIT, |
123 | IOSQE_BUFFER_SELECT_BIT, |
124 | IOSQE_CQE_SKIP_SUCCESS_BIT, |
125 | }; |
126 | |
127 | /* |
128 | * sqe->flags |
129 | */ |
130 | /* use fixed fileset */ |
131 | #define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT) |
132 | /* issue after inflight IO */ |
133 | #define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT) |
134 | /* links next sqe */ |
135 | #define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT) |
136 | /* like LINK, but stronger */ |
137 | #define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT) |
138 | /* always go async */ |
139 | #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) |
140 | /* select buffer from sqe->buf_group */ |
141 | #define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) |
142 | /* don't post CQE if request succeeded */ |
143 | #define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT) |
144 | |
145 | /* |
146 | * io_uring_setup() flags |
147 | */ |
148 | #define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ |
149 | #define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ |
150 | #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ |
151 | #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ |
152 | #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ |
153 | #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ |
154 | #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ |
155 | #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ |
156 | /* |
157 | * Cooperative task running. When requests complete, they often require |
158 | * forcing the submitter to transition to the kernel to complete. If this |
159 | * flag is set, work will be done when the task transitions anyway, rather |
160 | * than force an inter-processor interrupt reschedule. This avoids interrupting |
161 | * a task running in userspace, and saves an IPI. |
162 | */ |
163 | #define IORING_SETUP_COOP_TASKRUN (1U << 8) |
164 | /* |
165 | * If COOP_TASKRUN is set, get notified if task work is available for |
166 | * running and a kernel transition would be needed to run it. This sets |
167 | * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. |
168 | */ |
169 | #define IORING_SETUP_TASKRUN_FLAG (1U << 9) |
170 | #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ |
171 | #define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */ |
172 | /* |
173 | * Only one task is allowed to submit requests |
174 | */ |
175 | #define IORING_SETUP_SINGLE_ISSUER (1U << 12) |
176 | |
177 | /* |
178 | * Defer running task work to get events. |
179 | * Rather than running bits of task work whenever the task transitions |
180 | * try to do it just before it is needed. |
181 | */ |
182 | #define IORING_SETUP_DEFER_TASKRUN (1U << 13) |
183 | |
184 | /* |
185 | * Application provides the memory for the rings |
186 | */ |
187 | #define IORING_SETUP_NO_MMAP (1U << 14) |
188 | |
189 | /* |
190 | * Register the ring fd in itself for use with |
191 | * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather |
192 | * than an fd. |
193 | */ |
194 | #define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15) |
195 | |
196 | /* |
197 | * Removes indirection through the SQ index array. |
198 | */ |
199 | #define IORING_SETUP_NO_SQARRAY (1U << 16) |
200 | |
201 | enum io_uring_op { |
202 | IORING_OP_NOP, |
203 | IORING_OP_READV, |
204 | IORING_OP_WRITEV, |
205 | IORING_OP_FSYNC, |
206 | IORING_OP_READ_FIXED, |
207 | IORING_OP_WRITE_FIXED, |
208 | IORING_OP_POLL_ADD, |
209 | IORING_OP_POLL_REMOVE, |
210 | IORING_OP_SYNC_FILE_RANGE, |
211 | IORING_OP_SENDMSG, |
212 | IORING_OP_RECVMSG, |
213 | IORING_OP_TIMEOUT, |
214 | IORING_OP_TIMEOUT_REMOVE, |
215 | IORING_OP_ACCEPT, |
216 | IORING_OP_ASYNC_CANCEL, |
217 | IORING_OP_LINK_TIMEOUT, |
218 | IORING_OP_CONNECT, |
219 | IORING_OP_FALLOCATE, |
220 | IORING_OP_OPENAT, |
221 | IORING_OP_CLOSE, |
222 | IORING_OP_FILES_UPDATE, |
223 | IORING_OP_STATX, |
224 | IORING_OP_READ, |
225 | IORING_OP_WRITE, |
226 | IORING_OP_FADVISE, |
227 | IORING_OP_MADVISE, |
228 | IORING_OP_SEND, |
229 | IORING_OP_RECV, |
230 | IORING_OP_OPENAT2, |
231 | IORING_OP_EPOLL_CTL, |
232 | IORING_OP_SPLICE, |
233 | IORING_OP_PROVIDE_BUFFERS, |
234 | IORING_OP_REMOVE_BUFFERS, |
235 | IORING_OP_TEE, |
236 | IORING_OP_SHUTDOWN, |
237 | IORING_OP_RENAMEAT, |
238 | IORING_OP_UNLINKAT, |
239 | IORING_OP_MKDIRAT, |
240 | IORING_OP_SYMLINKAT, |
241 | IORING_OP_LINKAT, |
242 | IORING_OP_MSG_RING, |
243 | IORING_OP_FSETXATTR, |
244 | IORING_OP_SETXATTR, |
245 | IORING_OP_FGETXATTR, |
246 | IORING_OP_GETXATTR, |
247 | IORING_OP_SOCKET, |
248 | IORING_OP_URING_CMD, |
249 | IORING_OP_SEND_ZC, |
250 | IORING_OP_SENDMSG_ZC, |
251 | IORING_OP_READ_MULTISHOT, |
252 | IORING_OP_WAITID, |
253 | IORING_OP_FUTEX_WAIT, |
254 | IORING_OP_FUTEX_WAKE, |
255 | IORING_OP_FUTEX_WAITV, |
256 | |
257 | /* this goes last, obviously */ |
258 | IORING_OP_LAST, |
259 | }; |
260 | |
261 | /* |
262 | * sqe->uring_cmd_flags top 8bits aren't available for userspace |
263 | * IORING_URING_CMD_FIXED use registered buffer; pass this flag |
264 | * along with setting sqe->buf_index. |
265 | */ |
266 | #define IORING_URING_CMD_FIXED (1U << 0) |
267 | #define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED |
268 | |
269 | |
270 | /* |
271 | * sqe->fsync_flags |
272 | */ |
273 | #define IORING_FSYNC_DATASYNC (1U << 0) |
274 | |
275 | /* |
276 | * sqe->timeout_flags |
277 | */ |
278 | #define IORING_TIMEOUT_ABS (1U << 0) |
279 | #define IORING_TIMEOUT_UPDATE (1U << 1) |
280 | #define IORING_TIMEOUT_BOOTTIME (1U << 2) |
281 | #define IORING_TIMEOUT_REALTIME (1U << 3) |
282 | #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) |
283 | #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) |
284 | #define IORING_TIMEOUT_MULTISHOT (1U << 6) |
285 | #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) |
286 | #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) |
287 | /* |
288 | * sqe->splice_flags |
289 | * extends splice(2) flags |
290 | */ |
291 | #define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */ |
292 | |
293 | /* |
294 | * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the |
295 | * command flags for POLL_ADD are stored in sqe->len. |
296 | * |
297 | * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if |
298 | * the poll handler will continue to report |
299 | * CQEs on behalf of the same SQE. |
300 | * |
301 | * IORING_POLL_UPDATE Update existing poll request, matching |
302 | * sqe->addr as the old user_data field. |
303 | * |
304 | * IORING_POLL_LEVEL Level triggered poll. |
305 | */ |
306 | #define IORING_POLL_ADD_MULTI (1U << 0) |
307 | #define IORING_POLL_UPDATE_EVENTS (1U << 1) |
308 | #define IORING_POLL_UPDATE_USER_DATA (1U << 2) |
309 | #define IORING_POLL_ADD_LEVEL (1U << 3) |
310 | |
311 | /* |
312 | * ASYNC_CANCEL flags. |
313 | * |
314 | * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key |
315 | * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the |
316 | * request 'user_data' |
317 | * IORING_ASYNC_CANCEL_ANY Match any request |
318 | * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor |
319 | * IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key |
320 | * IORING_ASYNC_CANCEL_OP Match request based on opcode |
321 | */ |
322 | #define IORING_ASYNC_CANCEL_ALL (1U << 0) |
323 | #define IORING_ASYNC_CANCEL_FD (1U << 1) |
324 | #define IORING_ASYNC_CANCEL_ANY (1U << 2) |
325 | #define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3) |
326 | #define IORING_ASYNC_CANCEL_USERDATA (1U << 4) |
327 | #define IORING_ASYNC_CANCEL_OP (1U << 5) |
328 | |
329 | /* |
330 | * send/sendmsg and recv/recvmsg flags (sqe->ioprio) |
331 | * |
332 | * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send |
333 | * or receive and arm poll if that yields an |
334 | * -EAGAIN result, arm poll upfront and skip |
335 | * the initial transfer attempt. |
336 | * |
337 | * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if |
338 | * the handler will continue to report |
339 | * CQEs on behalf of the same SQE. |
340 | * |
341 | * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in |
342 | * the buf_index field. |
343 | * |
344 | * IORING_SEND_ZC_REPORT_USAGE |
345 | * If set, SEND[MSG]_ZC should report |
346 | * the zerocopy usage in cqe.res |
347 | * for the IORING_CQE_F_NOTIF cqe. |
348 | * 0 is reported if zerocopy was actually possible. |
349 | * IORING_NOTIF_USAGE_ZC_COPIED if data was copied |
350 | * (at least partially). |
351 | */ |
352 | #define IORING_RECVSEND_POLL_FIRST (1U << 0) |
353 | #define IORING_RECV_MULTISHOT (1U << 1) |
354 | #define IORING_RECVSEND_FIXED_BUF (1U << 2) |
355 | #define IORING_SEND_ZC_REPORT_USAGE (1U << 3) |
356 | |
357 | /* |
358 | * cqe.res for IORING_CQE_F_NOTIF if |
359 | * IORING_SEND_ZC_REPORT_USAGE was requested |
360 | * |
361 | * It should be treated as a flag, all other |
362 | * bits of cqe.res should be treated as reserved! |
363 | */ |
364 | #define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31) |
365 | |
366 | /* |
367 | * accept flags stored in sqe->ioprio |
368 | */ |
369 | #define IORING_ACCEPT_MULTISHOT (1U << 0) |
370 | |
371 | /* |
372 | * IORING_OP_MSG_RING command types, stored in sqe->addr |
373 | */ |
374 | enum { |
375 | IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */ |
376 | IORING_MSG_SEND_FD, /* send a registered fd to another ring */ |
377 | }; |
378 | |
379 | /* |
380 | * IORING_OP_MSG_RING flags (sqe->msg_ring_flags) |
381 | * |
382 | * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not |
383 | * applicable for IORING_MSG_DATA, obviously. |
384 | */ |
385 | #define IORING_MSG_RING_CQE_SKIP (1U << 0) |
386 | /* Pass through the flags from sqe->file_index to cqe->flags */ |
387 | #define IORING_MSG_RING_FLAGS_PASS (1U << 1) |
388 | |
389 | /* |
390 | * IO completion data structure (Completion Queue Entry) |
391 | */ |
392 | struct io_uring_cqe { |
393 | __u64 user_data; /* sqe->data submission passed back */ |
394 | __s32 res; /* result code for this event */ |
395 | __u32 flags; |
396 | |
397 | /* |
398 | * If the ring is initialized with IORING_SETUP_CQE32, then this field |
399 | * contains 16-bytes of padding, doubling the size of the CQE. |
400 | */ |
401 | __u64 big_cqe[]; |
402 | }; |
403 | |
404 | /* |
405 | * cqe->flags |
406 | * |
407 | * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID |
408 | * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries |
409 | * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv |
410 | * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct |
411 | * them from sends. |
412 | */ |
413 | #define IORING_CQE_F_BUFFER (1U << 0) |
414 | #define IORING_CQE_F_MORE (1U << 1) |
415 | #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) |
416 | #define IORING_CQE_F_NOTIF (1U << 3) |
417 | |
418 | enum { |
419 | IORING_CQE_BUFFER_SHIFT = 16, |
420 | }; |
421 | |
422 | /* |
423 | * Magic offsets for the application to mmap the data it needs |
424 | */ |
425 | #define IORING_OFF_SQ_RING 0ULL |
426 | #define IORING_OFF_CQ_RING 0x8000000ULL |
427 | #define IORING_OFF_SQES 0x10000000ULL |
428 | #define IORING_OFF_PBUF_RING 0x80000000ULL |
429 | #define IORING_OFF_PBUF_SHIFT 16 |
430 | #define IORING_OFF_MMAP_MASK 0xf8000000ULL |
431 | |
432 | /* |
433 | * Filled with the offset for mmap(2) |
434 | */ |
435 | struct io_sqring_offsets { |
436 | __u32 head; |
437 | __u32 tail; |
438 | __u32 ring_mask; |
439 | __u32 ring_entries; |
440 | __u32 flags; |
441 | __u32 dropped; |
442 | __u32 array; |
443 | __u32 resv1; |
444 | __u64 user_addr; |
445 | }; |
446 | |
447 | /* |
448 | * sq_ring->flags |
449 | */ |
450 | #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ |
451 | #define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ |
452 | #define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */ |
453 | |
454 | struct io_cqring_offsets { |
455 | __u32 head; |
456 | __u32 tail; |
457 | __u32 ring_mask; |
458 | __u32 ring_entries; |
459 | __u32 overflow; |
460 | __u32 cqes; |
461 | __u32 flags; |
462 | __u32 resv1; |
463 | __u64 user_addr; |
464 | }; |
465 | |
466 | /* |
467 | * cq_ring->flags |
468 | */ |
469 | |
470 | /* disable eventfd notifications */ |
471 | #define IORING_CQ_EVENTFD_DISABLED (1U << 0) |
472 | |
473 | /* |
474 | * io_uring_enter(2) flags |
475 | */ |
476 | #define IORING_ENTER_GETEVENTS (1U << 0) |
477 | #define IORING_ENTER_SQ_WAKEUP (1U << 1) |
478 | #define IORING_ENTER_SQ_WAIT (1U << 2) |
479 | #define IORING_ENTER_EXT_ARG (1U << 3) |
480 | #define IORING_ENTER_REGISTERED_RING (1U << 4) |
481 | |
482 | /* |
483 | * Passed in for io_uring_setup(2). Copied back with updated info on success |
484 | */ |
485 | struct io_uring_params { |
486 | __u32 sq_entries; |
487 | __u32 cq_entries; |
488 | __u32 flags; |
489 | __u32 sq_thread_cpu; |
490 | __u32 sq_thread_idle; |
491 | __u32 features; |
492 | __u32 wq_fd; |
493 | __u32 resv[3]; |
494 | struct io_sqring_offsets sq_off; |
495 | struct io_cqring_offsets cq_off; |
496 | }; |
497 | |
498 | /* |
499 | * io_uring_params->features flags |
500 | */ |
501 | #define IORING_FEAT_SINGLE_MMAP (1U << 0) |
502 | #define IORING_FEAT_NODROP (1U << 1) |
503 | #define IORING_FEAT_SUBMIT_STABLE (1U << 2) |
504 | #define IORING_FEAT_RW_CUR_POS (1U << 3) |
505 | #define IORING_FEAT_CUR_PERSONALITY (1U << 4) |
506 | #define IORING_FEAT_FAST_POLL (1U << 5) |
507 | #define IORING_FEAT_POLL_32BITS (1U << 6) |
508 | #define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) |
509 | #define IORING_FEAT_EXT_ARG (1U << 8) |
510 | #define IORING_FEAT_NATIVE_WORKERS (1U << 9) |
511 | #define IORING_FEAT_RSRC_TAGS (1U << 10) |
512 | #define IORING_FEAT_CQE_SKIP (1U << 11) |
513 | #define IORING_FEAT_LINKED_FILE (1U << 12) |
514 | #define IORING_FEAT_REG_REG_RING (1U << 13) |
515 | |
516 | /* |
517 | * io_uring_register(2) opcodes and arguments |
518 | */ |
519 | enum { |
520 | IORING_REGISTER_BUFFERS = 0, |
521 | IORING_UNREGISTER_BUFFERS = 1, |
522 | IORING_REGISTER_FILES = 2, |
523 | IORING_UNREGISTER_FILES = 3, |
524 | IORING_REGISTER_EVENTFD = 4, |
525 | IORING_UNREGISTER_EVENTFD = 5, |
526 | IORING_REGISTER_FILES_UPDATE = 6, |
527 | IORING_REGISTER_EVENTFD_ASYNC = 7, |
528 | IORING_REGISTER_PROBE = 8, |
529 | IORING_REGISTER_PERSONALITY = 9, |
530 | IORING_UNREGISTER_PERSONALITY = 10, |
531 | IORING_REGISTER_RESTRICTIONS = 11, |
532 | IORING_REGISTER_ENABLE_RINGS = 12, |
533 | |
534 | /* extended with tagging */ |
535 | IORING_REGISTER_FILES2 = 13, |
536 | IORING_REGISTER_FILES_UPDATE2 = 14, |
537 | IORING_REGISTER_BUFFERS2 = 15, |
538 | IORING_REGISTER_BUFFERS_UPDATE = 16, |
539 | |
540 | /* set/clear io-wq thread affinities */ |
541 | IORING_REGISTER_IOWQ_AFF = 17, |
542 | IORING_UNREGISTER_IOWQ_AFF = 18, |
543 | |
544 | /* set/get max number of io-wq workers */ |
545 | IORING_REGISTER_IOWQ_MAX_WORKERS = 19, |
546 | |
547 | /* register/unregister io_uring fd with the ring */ |
548 | IORING_REGISTER_RING_FDS = 20, |
549 | IORING_UNREGISTER_RING_FDS = 21, |
550 | |
551 | /* register ring based provide buffer group */ |
552 | IORING_REGISTER_PBUF_RING = 22, |
553 | IORING_UNREGISTER_PBUF_RING = 23, |
554 | |
555 | /* sync cancelation API */ |
556 | IORING_REGISTER_SYNC_CANCEL = 24, |
557 | |
558 | /* register a range of fixed file slots for automatic slot allocation */ |
559 | IORING_REGISTER_FILE_ALLOC_RANGE = 25, |
560 | |
561 | /* this goes last */ |
562 | IORING_REGISTER_LAST, |
563 | |
564 | /* flag added to the opcode to use a registered ring fd */ |
565 | IORING_REGISTER_USE_REGISTERED_RING = 1U << 31 |
566 | }; |
567 | |
568 | /* io-wq worker categories */ |
569 | enum { |
570 | IO_WQ_BOUND, |
571 | IO_WQ_UNBOUND, |
572 | }; |
573 | |
574 | /* deprecated, see struct io_uring_rsrc_update */ |
575 | struct io_uring_files_update { |
576 | __u32 offset; |
577 | __u32 resv; |
578 | __aligned_u64 /* __s32 * */ fds; |
579 | }; |
580 | |
581 | /* |
582 | * Register a fully sparse file space, rather than pass in an array of all |
583 | * -1 file descriptors. |
584 | */ |
585 | #define IORING_RSRC_REGISTER_SPARSE (1U << 0) |
586 | |
587 | struct io_uring_rsrc_register { |
588 | __u32 nr; |
589 | __u32 flags; |
590 | __u64 resv2; |
591 | __aligned_u64 data; |
592 | __aligned_u64 tags; |
593 | }; |
594 | |
595 | struct io_uring_rsrc_update { |
596 | __u32 offset; |
597 | __u32 resv; |
598 | __aligned_u64 data; |
599 | }; |
600 | |
601 | struct io_uring_rsrc_update2 { |
602 | __u32 offset; |
603 | __u32 resv; |
604 | __aligned_u64 data; |
605 | __aligned_u64 tags; |
606 | __u32 nr; |
607 | __u32 resv2; |
608 | }; |
609 | |
610 | /* Skip updating fd indexes set to this value in the fd table */ |
611 | #define IORING_REGISTER_FILES_SKIP (-2) |
612 | |
613 | #define IO_URING_OP_SUPPORTED (1U << 0) |
614 | |
615 | struct io_uring_probe_op { |
616 | __u8 op; |
617 | __u8 resv; |
618 | __u16 flags; /* IO_URING_OP_* flags */ |
619 | __u32 resv2; |
620 | }; |
621 | |
622 | struct io_uring_probe { |
623 | __u8 last_op; /* last opcode supported */ |
624 | __u8 ops_len; /* length of ops[] array below */ |
625 | __u16 resv; |
626 | __u32 resv2[3]; |
627 | struct io_uring_probe_op ops[]; |
628 | }; |
629 | |
630 | struct io_uring_restriction { |
631 | __u16 opcode; |
632 | union { |
633 | __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */ |
634 | __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */ |
635 | __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */ |
636 | }; |
637 | __u8 resv; |
638 | __u32 resv2[3]; |
639 | }; |
640 | |
641 | struct io_uring_buf { |
642 | __u64 addr; |
643 | __u32 len; |
644 | __u16 bid; |
645 | __u16 resv; |
646 | }; |
647 | |
648 | struct io_uring_buf_ring { |
649 | union { |
650 | /* |
651 | * To avoid spilling into more pages than we need to, the |
652 | * ring tail is overlaid with the io_uring_buf->resv field. |
653 | */ |
654 | struct { |
655 | __u64 resv1; |
656 | __u32 resv2; |
657 | __u16 resv3; |
658 | __u16 tail; |
659 | }; |
660 | __DECLARE_FLEX_ARRAY(struct io_uring_buf, bufs); |
661 | }; |
662 | }; |
663 | |
664 | /* |
665 | * Flags for IORING_REGISTER_PBUF_RING. |
666 | * |
667 | * IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring. |
668 | * The application must not set a ring_addr in struct |
669 | * io_uring_buf_reg, instead it must subsequently call |
670 | * mmap(2) with the offset set as: |
671 | * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) |
672 | * to get a virtual mapping for the ring. |
673 | */ |
674 | enum { |
675 | IOU_PBUF_RING_MMAP = 1, |
676 | }; |
677 | |
678 | /* argument for IORING_(UN)REGISTER_PBUF_RING */ |
679 | struct io_uring_buf_reg { |
680 | __u64 ring_addr; |
681 | __u32 ring_entries; |
682 | __u16 bgid; |
683 | __u16 flags; |
684 | __u64 resv[3]; |
685 | }; |
686 | |
687 | /* |
688 | * io_uring_restriction->opcode values |
689 | */ |
690 | enum { |
691 | /* Allow an io_uring_register(2) opcode */ |
692 | IORING_RESTRICTION_REGISTER_OP = 0, |
693 | |
694 | /* Allow an sqe opcode */ |
695 | IORING_RESTRICTION_SQE_OP = 1, |
696 | |
697 | /* Allow sqe flags */ |
698 | IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2, |
699 | |
700 | /* Require sqe flags (these flags must be set on each submission) */ |
701 | IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3, |
702 | |
703 | IORING_RESTRICTION_LAST |
704 | }; |
705 | |
706 | struct io_uring_getevents_arg { |
707 | __u64 sigmask; |
708 | __u32 sigmask_sz; |
709 | __u32 pad; |
710 | __u64 ts; |
711 | }; |
712 | |
713 | /* |
714 | * Argument for IORING_REGISTER_SYNC_CANCEL |
715 | */ |
716 | struct io_uring_sync_cancel_reg { |
717 | __u64 addr; |
718 | __s32 fd; |
719 | __u32 flags; |
720 | struct __kernel_timespec timeout; |
721 | __u8 opcode; |
722 | __u8 pad[7]; |
723 | __u64 pad2[3]; |
724 | }; |
725 | |
726 | /* |
727 | * Argument for IORING_REGISTER_FILE_ALLOC_RANGE |
728 | * The range is specified as [off, off + len) |
729 | */ |
730 | struct io_uring_file_index_range { |
731 | __u32 off; |
732 | __u32 len; |
733 | __u64 resv; |
734 | }; |
735 | |
736 | struct io_uring_recvmsg_out { |
737 | __u32 namelen; |
738 | __u32 controllen; |
739 | __u32 payloadlen; |
740 | __u32 flags; |
741 | }; |
742 | |
743 | /* |
744 | * Argument for IORING_OP_URING_CMD when file is a socket |
745 | */ |
746 | enum { |
747 | SOCKET_URING_OP_SIOCINQ = 0, |
748 | SOCKET_URING_OP_SIOCOUTQ, |
749 | SOCKET_URING_OP_GETSOCKOPT, |
750 | SOCKET_URING_OP_SETSOCKOPT, |
751 | }; |
752 | |
753 | #ifdef __cplusplus |
754 | } |
755 | #endif |
756 | |
757 | #endif |
758 | |