io_uring: expand main struct io_kiocb flags to 64-bits
authorJens Axboe <axboe@kernel.dk>
Mon, 29 Jan 2024 03:05:47 +0000 (20:05 -0700)
committerJens Axboe <axboe@kernel.dk>
Thu, 8 Feb 2024 20:27:03 +0000 (13:27 -0700)
We're out of space here, and none of the flags are easily reclaimable.
Bump it to 64-bits and re-arrange the struct a bit to avoid gaps.

Add a specific bitwise type for the request flags, io_request_flags_t.
This will help catch violations of casting this value to a smaller type
on 32-bit archs, like unsigned int.

This creates a hole in the io_kiocb, so move nr_tw up and rsrc_node down
to retain needing only cacheline 0 and 1 for non-polled opcodes.

No functional changes intended in this patch.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
include/linux/io_uring_types.h
include/trace/events/io_uring.h
io_uring/filetable.h
io_uring/io_uring.c

index 854ad67a5f70e8a9f9055f5d76762717c3c33c69..56bf733d3ee65ba5a532430ff30b7296beca1820 100644 (file)
@@ -468,70 +468,73 @@ enum {
        __REQ_F_LAST_BIT,
 };
 
+typedef u64 __bitwise io_req_flags_t;
+#define IO_REQ_FLAG(bitno)     ((__force io_req_flags_t) BIT_ULL((bitno)))
+
 enum {
        /* ctx owns file */
-       REQ_F_FIXED_FILE        = BIT(REQ_F_FIXED_FILE_BIT),
+       REQ_F_FIXED_FILE        = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT),
        /* drain existing IO first */
-       REQ_F_IO_DRAIN          = BIT(REQ_F_IO_DRAIN_BIT),
+       REQ_F_IO_DRAIN          = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT),
        /* linked sqes */
-       REQ_F_LINK              = BIT(REQ_F_LINK_BIT),
+       REQ_F_LINK              = IO_REQ_FLAG(REQ_F_LINK_BIT),
        /* doesn't sever on completion < 0 */
-       REQ_F_HARDLINK          = BIT(REQ_F_HARDLINK_BIT),
+       REQ_F_HARDLINK          = IO_REQ_FLAG(REQ_F_HARDLINK_BIT),
        /* IOSQE_ASYNC */
-       REQ_F_FORCE_ASYNC       = BIT(REQ_F_FORCE_ASYNC_BIT),
+       REQ_F_FORCE_ASYNC       = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT),
        /* IOSQE_BUFFER_SELECT */
-       REQ_F_BUFFER_SELECT     = BIT(REQ_F_BUFFER_SELECT_BIT),
+       REQ_F_BUFFER_SELECT     = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT),
        /* IOSQE_CQE_SKIP_SUCCESS */
-       REQ_F_CQE_SKIP          = BIT(REQ_F_CQE_SKIP_BIT),
+       REQ_F_CQE_SKIP          = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT),
 
        /* fail rest of links */
-       REQ_F_FAIL              = BIT(REQ_F_FAIL_BIT),
+       REQ_F_FAIL              = IO_REQ_FLAG(REQ_F_FAIL_BIT),
        /* on inflight list, should be cancelled and waited on exit reliably */
-       REQ_F_INFLIGHT          = BIT(REQ_F_INFLIGHT_BIT),
+       REQ_F_INFLIGHT          = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT),
        /* read/write uses file position */
-       REQ_F_CUR_POS           = BIT(REQ_F_CUR_POS_BIT),
+       REQ_F_CUR_POS           = IO_REQ_FLAG(REQ_F_CUR_POS_BIT),
        /* must not punt to workers */
-       REQ_F_NOWAIT            = BIT(REQ_F_NOWAIT_BIT),
+       REQ_F_NOWAIT            = IO_REQ_FLAG(REQ_F_NOWAIT_BIT),
        /* has or had linked timeout */
-       REQ_F_LINK_TIMEOUT      = BIT(REQ_F_LINK_TIMEOUT_BIT),
+       REQ_F_LINK_TIMEOUT      = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT),
        /* needs cleanup */
-       REQ_F_NEED_CLEANUP      = BIT(REQ_F_NEED_CLEANUP_BIT),
+       REQ_F_NEED_CLEANUP      = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
        /* already went through poll handler */
-       REQ_F_POLLED            = BIT(REQ_F_POLLED_BIT),
+       REQ_F_POLLED            = IO_REQ_FLAG(REQ_F_POLLED_BIT),
        /* buffer already selected */
-       REQ_F_BUFFER_SELECTED   = BIT(REQ_F_BUFFER_SELECTED_BIT),
+       REQ_F_BUFFER_SELECTED   = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
        /* buffer selected from ring, needs commit */
-       REQ_F_BUFFER_RING       = BIT(REQ_F_BUFFER_RING_BIT),
+       REQ_F_BUFFER_RING       = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT),
        /* caller should reissue async */
-       REQ_F_REISSUE           = BIT(REQ_F_REISSUE_BIT),
+       REQ_F_REISSUE           = IO_REQ_FLAG(REQ_F_REISSUE_BIT),
        /* supports async reads/writes */
-       REQ_F_SUPPORT_NOWAIT    = BIT(REQ_F_SUPPORT_NOWAIT_BIT),
+       REQ_F_SUPPORT_NOWAIT    = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT),
        /* regular file */
-       REQ_F_ISREG             = BIT(REQ_F_ISREG_BIT),
+       REQ_F_ISREG             = IO_REQ_FLAG(REQ_F_ISREG_BIT),
        /* has creds assigned */
-       REQ_F_CREDS             = BIT(REQ_F_CREDS_BIT),
+       REQ_F_CREDS             = IO_REQ_FLAG(REQ_F_CREDS_BIT),
        /* skip refcounting if not set */
-       REQ_F_REFCOUNT          = BIT(REQ_F_REFCOUNT_BIT),
+       REQ_F_REFCOUNT          = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT),
        /* there is a linked timeout that has to be armed */
-       REQ_F_ARM_LTIMEOUT      = BIT(REQ_F_ARM_LTIMEOUT_BIT),
+       REQ_F_ARM_LTIMEOUT      = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT),
        /* ->async_data allocated */
-       REQ_F_ASYNC_DATA        = BIT(REQ_F_ASYNC_DATA_BIT),
+       REQ_F_ASYNC_DATA        = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT),
        /* don't post CQEs while failing linked requests */
-       REQ_F_SKIP_LINK_CQES    = BIT(REQ_F_SKIP_LINK_CQES_BIT),
+       REQ_F_SKIP_LINK_CQES    = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT),
        /* single poll may be active */
-       REQ_F_SINGLE_POLL       = BIT(REQ_F_SINGLE_POLL_BIT),
+       REQ_F_SINGLE_POLL       = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT),
        /* double poll may active */
-       REQ_F_DOUBLE_POLL       = BIT(REQ_F_DOUBLE_POLL_BIT),
+       REQ_F_DOUBLE_POLL       = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT),
        /* request has already done partial IO */
-       REQ_F_PARTIAL_IO        = BIT(REQ_F_PARTIAL_IO_BIT),
+       REQ_F_PARTIAL_IO        = IO_REQ_FLAG(REQ_F_PARTIAL_IO_BIT),
        /* fast poll multishot mode */
-       REQ_F_APOLL_MULTISHOT   = BIT(REQ_F_APOLL_MULTISHOT_BIT),
+       REQ_F_APOLL_MULTISHOT   = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
        /* recvmsg special flag, clear EPOLLIN */
-       REQ_F_CLEAR_POLLIN      = BIT(REQ_F_CLEAR_POLLIN_BIT),
+       REQ_F_CLEAR_POLLIN      = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT),
        /* hashed into ->cancel_hash_locked, protected by ->uring_lock */
-       REQ_F_HASH_LOCKED       = BIT(REQ_F_HASH_LOCKED_BIT),
+       REQ_F_HASH_LOCKED       = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
        /* don't use lazy poll wake for this request */
-       REQ_F_POLL_NO_LAZY      = BIT(REQ_F_POLL_NO_LAZY_BIT),
+       REQ_F_POLL_NO_LAZY      = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
 };
 
 typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
@@ -592,15 +595,17 @@ struct io_kiocb {
         * and after selection it points to the buffer ID itself.
         */
        u16                             buf_index;
-       unsigned int                    flags;
+
+       unsigned                        nr_tw;
+
+       /* REQ_F_* flags */
+       io_req_flags_t                  flags;
 
        struct io_cqe                   cqe;
 
        struct io_ring_ctx              *ctx;
        struct task_struct              *task;
 
-       struct io_rsrc_node             *rsrc_node;
-
        union {
                /* store used ubuf, so we can prevent reloading */
                struct io_mapped_ubuf   *imu;
@@ -621,10 +626,12 @@ struct io_kiocb {
                /* cache ->apoll->events */
                __poll_t apoll_events;
        };
+
+       struct io_rsrc_node             *rsrc_node;
+
        atomic_t                        refs;
        atomic_t                        poll_refs;
        struct io_task_work             io_task_work;
-       unsigned                        nr_tw;
        /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
        struct hlist_node               hash_node;
        /* internal polling, see IORING_FEAT_FAST_POLL */
index 69454f1f98b01eb3d16eb01eb2c9c39d69c7705b..b241f2b124863a6aa5ccf0ad82873aea993acbea 100644 (file)
@@ -148,7 +148,7 @@ TRACE_EVENT(io_uring_queue_async_work,
                __field(  void *,                       req             )
                __field(  u64,                          user_data       )
                __field(  u8,                           opcode          )
-               __field(  unsigned int,                 flags           )
+               __field(  unsigned long long,           flags           )
                __field(  struct io_wq_work *,          work            )
                __field(  int,                          rw              )
 
@@ -159,7 +159,7 @@ TRACE_EVENT(io_uring_queue_async_work,
                __entry->ctx            = req->ctx;
                __entry->req            = req;
                __entry->user_data      = req->cqe.user_data;
-               __entry->flags          = req->flags;
+               __entry->flags          = (__force unsigned long long) req->flags;
                __entry->opcode         = req->opcode;
                __entry->work           = &req->work;
                __entry->rw             = rw;
@@ -167,10 +167,10 @@ TRACE_EVENT(io_uring_queue_async_work,
                __assign_str(op_str, io_uring_get_opcode(req->opcode));
        ),
 
-       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
+       TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%llx, %s queue, work %p",
                __entry->ctx, __entry->req, __entry->user_data,
-               __get_str(op_str),
-               __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
+               __get_str(op_str), __entry->flags,
+               __entry->rw ? "hashed" : "normal", __entry->work)
 );
 
 /**
@@ -378,7 +378,7 @@ TRACE_EVENT(io_uring_submit_req,
                __field(  void *,               req             )
                __field(  unsigned long long,   user_data       )
                __field(  u8,                   opcode          )
-               __field(  u32,                  flags           )
+               __field(  unsigned long long,   flags           )
                __field(  bool,                 sq_thread       )
 
                __string( op_str, io_uring_get_opcode(req->opcode) )
@@ -389,16 +389,16 @@ TRACE_EVENT(io_uring_submit_req,
                __entry->req            = req;
                __entry->user_data      = req->cqe.user_data;
                __entry->opcode         = req->opcode;
-               __entry->flags          = req->flags;
+               __entry->flags          = (__force unsigned long long) req->flags;
                __entry->sq_thread      = req->ctx->flags & IORING_SETUP_SQPOLL;
 
                __assign_str(op_str, io_uring_get_opcode(req->opcode));
        ),
 
-       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
+       TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%llx, "
                  "sq_thread %d", __entry->ctx, __entry->req,
-                 __entry->user_data, __get_str(op_str),
-                 __entry->flags, __entry->sq_thread)
+                 __entry->user_data, __get_str(op_str), __entry->flags,
+                 __entry->sq_thread)
 );
 
 /*
index b47adf170c314daaf4c09b2b8ab1876079ccbf60..b2435c4dca1f9fd24ddadf3396edbf0d146eea96 100644 (file)
@@ -17,7 +17,7 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset);
 int io_register_file_alloc_range(struct io_ring_ctx *ctx,
                                 struct io_uring_file_index_range __user *arg);
 
-unsigned int io_file_get_flags(struct file *file);
+io_req_flags_t io_file_get_flags(struct file *file);
 
 static inline void io_file_bitmap_clear(struct io_file_table *table, int bit)
 {
index cd9a137ad6cefbb907a177fd8f0c9753ac0c70dd..b8ca907b77eb90f8f8c673895c8cd4aa165a6c42 100644 (file)
@@ -1768,9 +1768,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
        }
 }
 
-unsigned int io_file_get_flags(struct file *file)
+io_req_flags_t io_file_get_flags(struct file *file)
 {
-       unsigned int res = 0;
+       io_req_flags_t res = 0;
 
        if (S_ISREG(file_inode(file)->i_mode))
                res |= REQ_F_ISREG;
@@ -2171,7 +2171,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
        /* req is partially pre-initialised, see io_preinit_req() */
        req->opcode = opcode = READ_ONCE(sqe->opcode);
        /* same numerical values with corresponding REQ_F_*, safe to copy */
-       req->flags = sqe_flags = READ_ONCE(sqe->flags);
+       sqe_flags = READ_ONCE(sqe->flags);
+       req->flags = (io_req_flags_t) sqe_flags;
        req->cqe.user_data = READ_ONCE(sqe->user_data);
        req->file = NULL;
        req->rsrc_node = NULL;
@@ -4153,7 +4154,7 @@ static int __init io_uring_init(void)
        BUILD_BUG_ON(SQE_COMMON_FLAGS >= (1 << 8));
        BUILD_BUG_ON((SQE_VALID_FLAGS | SQE_COMMON_FLAGS) != SQE_VALID_FLAGS);
 
-       BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
+       BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof_field(struct io_kiocb, flags));
 
        BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32));