io_uring: track restrictions separately for IORING_OP and IORING_REGISTER

It's quite likely that only register opcode restrictions exists, in
which case we'd never need to check the normal opcodes. Split
ctx->restricted into two separate fields, one for I/O opcodes, and one
for register opcodes.

Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe 2026-01-12 08:14:45 -07:00
parent 991fb85a1d
commit d6406c45f1
3 changed files with 22 additions and 9 deletions

View File

@ -224,7 +224,10 @@ struct io_restriction {
DECLARE_BITMAP(sqe_op, IORING_OP_LAST);
u8 sqe_flags_allowed;
u8 sqe_flags_required;
bool registered;
/* IORING_OP_* restrictions exist */
bool op_registered;
/* IORING_REGISTER_* restrictions exist */
bool reg_registered;
};
struct io_submit_link {
@ -259,7 +262,8 @@ struct io_ring_ctx {
struct {
unsigned int flags;
unsigned int drain_next: 1;
unsigned int restricted: 1;
unsigned int op_restricted: 1;
unsigned int reg_restricted: 1;
unsigned int off_timeout_used: 1;
unsigned int drain_active: 1;
unsigned int has_evfd: 1;

View File

@ -2056,7 +2056,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
struct io_kiocb *req,
unsigned int sqe_flags)
{
if (!ctx->restricted)
if (!ctx->op_restricted)
return true;
if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
return false;
@ -2159,7 +2159,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
io_init_drain(ctx);
}
}
if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) {
if (unlikely(ctx->op_restricted || ctx->drain_active || ctx->drain_next)) {
if (!io_check_restriction(ctx, req, sqe_flags))
return io_init_fail_req(req, -EACCES);
/* knock it to the slow queue path, will be drained there */

View File

@ -133,24 +133,31 @@ static __cold int io_parse_restrictions(void __user *arg, unsigned int nr_args,
if (res[i].register_op >= IORING_REGISTER_LAST)
goto err;
__set_bit(res[i].register_op, restrictions->register_op);
restrictions->reg_registered = true;
break;
case IORING_RESTRICTION_SQE_OP:
if (res[i].sqe_op >= IORING_OP_LAST)
goto err;
__set_bit(res[i].sqe_op, restrictions->sqe_op);
restrictions->op_registered = true;
break;
case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
restrictions->sqe_flags_allowed = res[i].sqe_flags;
restrictions->op_registered = true;
break;
case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
restrictions->sqe_flags_required = res[i].sqe_flags;
restrictions->op_registered = true;
break;
default:
goto err;
}
}
ret = nr_args;
restrictions->registered = true;
if (!nr_args) {
restrictions->op_registered = true;
restrictions->reg_registered = true;
}
err:
kfree(res);
return ret;
@ -166,7 +173,7 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
return -EBADFD;
/* We allow only a single restrictions registration */
if (ctx->restrictions.registered)
if (ctx->restrictions.op_registered || ctx->restrictions.reg_registered)
return -EBUSY;
ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions);
@ -175,8 +182,10 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
return ret;
}
if (ctx->restrictions.registered)
ctx->restricted = 1;
if (ctx->restrictions.op_registered)
ctx->op_restricted = 1;
if (ctx->restrictions.reg_registered)
ctx->reg_restricted = 1;
return 0;
}
@ -626,7 +635,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (ctx->submitter_task && ctx->submitter_task != current)
return -EEXIST;
if (ctx->restricted && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
if (ctx->reg_restricted && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
if (!test_bit(opcode, ctx->restrictions.register_op))
return -EACCES;