diff mbox series

[for-next,v2,01/12] io_uring: dont remove file from msg_ring reqs

Message ID e5ac9edadb574fe33f6d727cb8f14ce68262a684.1670384893.git.asml.silence@gmail.com (mailing list archive)
State New
Headers show
Series CQ locking optimisation | expand

Commit Message

Pavel Begunkov Dec. 7, 2022, 3:53 a.m. UTC
We should not be messing with req->file outside of core paths. Clearing
it makes msg_ring non reentrant, i.e. luckily io_msg_send_fd() fails the
request on failed io_double_lock_ctx() but clearly was originally
intended to do retries instead.

Cc: stable@vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/io_uring.c | 2 +-
 io_uring/msg_ring.c | 4 ----
 io_uring/opdef.c    | 7 +++++++
 io_uring/opdef.h    | 2 ++
 4 files changed, 10 insertions(+), 5 deletions(-)

Comments

Jens Axboe Dec. 7, 2022, 1:52 p.m. UTC | #1
On 12/6/22 8:53 PM, Pavel Begunkov wrote:
> We should not be messing with req->file outside of core paths. Clearing
> it makes msg_ring non reentrant, i.e. luckily io_msg_send_fd() fails the
> request on failed io_double_lock_ctx() but clearly was originally
> intended to do retries instead.

That's basically what I had in my patch, except I just went for the
negated one instead to cut down on churn. Why not just do that?
Pavel Begunkov Dec. 7, 2022, 9:12 p.m. UTC | #2
On 12/7/22 13:52, Jens Axboe wrote:
> On 12/6/22 8:53 PM, Pavel Begunkov wrote:
>> We should not be messing with req->file outside of core paths. Clearing
>> it makes msg_ring non reentrant, i.e. luckily io_msg_send_fd() fails the
>> request on failed io_double_lock_ctx() but clearly was originally
>> intended to do retries instead.
> 
> That's basically what I had in my patch, except I just went for the
> negated one instead to cut down on churn. Why not just do that?
I just already had this patch so left it as is, but if I have to
find a reason it would be: 1) considering that the req->file check
is already an exception to the rule, the negative would be an
exception to the exception, and 2) it removes that extra req->file
check.
Jens Axboe Dec. 7, 2022, 9:23 p.m. UTC | #3
On 12/7/22 2:12 PM, Pavel Begunkov wrote:
> On 12/7/22 13:52, Jens Axboe wrote:
>> On 12/6/22 8:53 PM, Pavel Begunkov wrote:
>>> We should not be messing with req->file outside of core paths. Clearing
>>> it makes msg_ring non reentrant, i.e. luckily io_msg_send_fd() fails the
>>> request on failed io_double_lock_ctx() but clearly was originally
>>> intended to do retries instead.
>>
>> That's basically what I had in my patch, except I just went for the
>> negated one instead to cut down on churn. Why not just do that?
> I just already had this patch so left it as is, but if I have to
> find a reason it would be: 1) considering that the req->file check
> is already an exception to the rule, the negative would be an
> exception to the exception, and 2) it removes that extra req->file
> check.

Let's just leave it as-is, was only mostly concerned with potential
backport pain.
diff mbox series

Patch

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 436b1ac8f6d0..62372a641add 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1810,7 +1810,7 @@  static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 		return ret;
 
 	/* If the op doesn't have a file, we're not polling for it */
-	if ((req->ctx->flags & IORING_SETUP_IOPOLL) && req->file)
+	if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue)
 		io_iopoll_req_issued(req, issue_flags);
 
 	return 0;
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index afb543aab9f6..615c85e164ab 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -167,9 +167,5 @@  int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
 	if (ret < 0)
 		req_set_fail(req);
 	io_req_set_res(req, ret, 0);
-	/* put file to avoid an attempt to IOPOLL the req */
-	if (!(req->flags & REQ_F_FIXED_FILE))
-		io_put_file(req->file);
-	req->file = NULL;
 	return IOU_OK;
 }
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 83dc0f9ad3b2..04dd2c983fce 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -63,6 +63,7 @@  const struct io_op_def io_op_defs[] = {
 		.audit_skip		= 1,
 		.ioprio			= 1,
 		.iopoll			= 1,
+		.iopoll_queue		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 		.name			= "READV",
 		.prep			= io_prep_rw,
@@ -80,6 +81,7 @@  const struct io_op_def io_op_defs[] = {
 		.audit_skip		= 1,
 		.ioprio			= 1,
 		.iopoll			= 1,
+		.iopoll_queue		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 		.name			= "WRITEV",
 		.prep			= io_prep_rw,
@@ -103,6 +105,7 @@  const struct io_op_def io_op_defs[] = {
 		.audit_skip		= 1,
 		.ioprio			= 1,
 		.iopoll			= 1,
+		.iopoll_queue		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 		.name			= "READ_FIXED",
 		.prep			= io_prep_rw,
@@ -118,6 +121,7 @@  const struct io_op_def io_op_defs[] = {
 		.audit_skip		= 1,
 		.ioprio			= 1,
 		.iopoll			= 1,
+		.iopoll_queue		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 		.name			= "WRITE_FIXED",
 		.prep			= io_prep_rw,
@@ -277,6 +281,7 @@  const struct io_op_def io_op_defs[] = {
 		.audit_skip		= 1,
 		.ioprio			= 1,
 		.iopoll			= 1,
+		.iopoll_queue		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 		.name			= "READ",
 		.prep			= io_prep_rw,
@@ -292,6 +297,7 @@  const struct io_op_def io_op_defs[] = {
 		.audit_skip		= 1,
 		.ioprio			= 1,
 		.iopoll			= 1,
+		.iopoll_queue		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 		.name			= "WRITE",
 		.prep			= io_prep_rw,
@@ -481,6 +487,7 @@  const struct io_op_def io_op_defs[] = {
 		.plug			= 1,
 		.name			= "URING_CMD",
 		.iopoll			= 1,
+		.iopoll_queue		= 1,
 		.async_size		= uring_cmd_pdu_size(1),
 		.prep			= io_uring_cmd_prep,
 		.issue			= io_uring_cmd,
diff --git a/io_uring/opdef.h b/io_uring/opdef.h
index 3efe06d25473..df7e13d9bfba 100644
--- a/io_uring/opdef.h
+++ b/io_uring/opdef.h
@@ -25,6 +25,8 @@  struct io_op_def {
 	unsigned		ioprio : 1;
 	/* supports iopoll */
 	unsigned		iopoll : 1;
+	/* have to be put into the iopoll list */
+	unsigned		iopoll_queue : 1;
 	/* opcode specific path will handle ->async_data allocation if needed */
 	unsigned		manual_alloc : 1;
 	/* size of async data needed, if any */