diff mbox series

[4/5] nvme: split out metadata vs non metadata end_io uring_cmd completions

Message ID 20220922182805.96173-5-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series Enable alloc caching and batched freeing for passthrough | expand

Commit Message

Jens Axboe Sept. 22, 2022, 6:28 p.m. UTC
By splitting up the metadata and non-metadata end_io handling, we can
remove any request dependencies on the normal non-metadata IO path. This
is in preparation for enabling the normal IO passthrough path to pass
the ownership of the request back to the block layer.

Co-developed-by: Stefan Roesch <shr@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/ioctl.c | 82 +++++++++++++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 16 deletions(-)

Comments

Christoph Hellwig Sept. 23, 2022, 3:21 p.m. UTC | #1
> +	union {
> +		struct {
> +			void *meta; /* kernel-resident buffer */
> +			void __user *meta_buffer;
> +		};
> +		struct {
> +			u32 nvme_flags;
> +			u32 nvme_status;
> +			u64 result;
> +		};
> +	};

Without naming the arms of the union this is becoming a bit too much
of a mess..

> +static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
> +{
> +	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
> +	int status;
> +
> +	if (pdu->nvme_flags & NVME_REQ_CANCELLED)
> +		status = -EINTR;
> +	else
> +		status = pdu->nvme_status;

If you add a signed int field you only need one field instead of
two in the pdu for this (the nvme status is only 15 bits anyway).
Jens Axboe Sept. 23, 2022, 8:52 p.m. UTC | #2
On 9/23/22 9:21 AM, Christoph Hellwig wrote:
>> +	union {
>> +		struct {
>> +			void *meta; /* kernel-resident buffer */
>> +			void __user *meta_buffer;
>> +		};
>> +		struct {
>> +			u32 nvme_flags;
>> +			u32 nvme_status;
>> +			u64 result;
>> +		};
>> +	};
> 
> Without naming the arms of the union this is becoming a bit too much
> of a mess..
> 
>> +static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
>> +{
>> +	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
>> +	int status;
>> +
>> +	if (pdu->nvme_flags & NVME_REQ_CANCELLED)
>> +		status = -EINTR;
>> +	else
>> +		status = pdu->nvme_status;
> 
> If you add a signed int field you only need one field instead of
> two in the pdu for this (the nvme status is only 15 bits anyway).

For both of these, how about we just simplify like below? I think
at that point it's useless to name them anyway.


diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 25f2f6df1602..6f955984ca14 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -350,16 +350,13 @@ struct nvme_uring_cmd_pdu {
 		struct request *req;
 	};
 	u32 meta_len;
+	u32 nvme_status;
 	union {
 		struct {
 			void *meta; /* kernel-resident buffer */
 			void __user *meta_buffer;
 		};
-		struct {
-			u32 nvme_flags;
-			u32 nvme_status;
-			u64 result;
-		};
+		u64 result;
 	};
 };
 
@@ -396,17 +393,11 @@ static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd)
 static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
 {
 	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-	int status;
-
-	if (pdu->nvme_flags & NVME_REQ_CANCELLED)
-		status = -EINTR;
-	else
-		status = pdu->nvme_status;
 
 	if (pdu->bio)
 		blk_rq_unmap_user(pdu->bio);
 
-	io_uring_cmd_done(ioucmd, status, pdu->result);
+	io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->result);
 }
 
 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
@@ -417,8 +408,10 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
 	void *cookie = READ_ONCE(ioucmd->cookie);
 
 	req->bio = pdu->bio;
-	pdu->nvme_flags = nvme_req(req)->flags;
-	pdu->nvme_status = nvme_req(req)->status;
+	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+		pdu->nvme_status = -EINTR;
+	else
+		pdu->nvme_status = nvme_req(req)->status;
 	pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
 
 	/*
Christoph Hellwig Sept. 26, 2022, 2:41 p.m. UTC | #3
On Fri, Sep 23, 2022 at 02:52:54PM -0600, Jens Axboe wrote:
> For both of these, how about we just simplify like below? I think
> at that point it's useless to name them anyway.

I think this version is better than the previous one, but I'd still
prefer a non-anonymous union.
Jens Axboe Sept. 26, 2022, 2:41 p.m. UTC | #4
On 9/26/22 8:41 AM, Christoph Hellwig wrote:
> On Fri, Sep 23, 2022 at 02:52:54PM -0600, Jens Axboe wrote:
>> For both of these, how about we just simplify like below? I think
>> at that point it's useless to name them anyway.
> 
> I think this version is better than the previous one, but I'd still
> prefer a non-anonymous union.

Sure, I don't really care. What name do you want for it?
Christoph Hellwig Sept. 26, 2022, 2:43 p.m. UTC | #5
On Mon, Sep 26, 2022 at 08:41:38AM -0600, Jens Axboe wrote:
> Sure, I don't really care. What name do you want for it?

Maybe slow and fast?  Or simple and meta?

> 
> -- 
> Jens Axboe
> 
> 
---end quoted text---
Jens Axboe Sept. 26, 2022, 2:50 p.m. UTC | #6
On 9/26/22 8:43 AM, Christoph Hellwig wrote:
> On Mon, Sep 26, 2022 at 08:41:38AM -0600, Jens Axboe wrote:
>> Sure, I don't really care. What name do you want for it?
> 
> Maybe slow and fast?  Or simple and meta?

So you want 'result' in a named struct too then? Because right now
it looks like this:

struct nvme_uring_cmd_pdu {
         union {
                 struct bio *bio;
                 struct request *req;
         };
         u32 meta_len;
         u32 nvme_status;
         union {
                 struct {
                         void *meta; /* kernel-resident buffer */
                         void __user *meta_buffer;
                 };
                 u64 result;
         };
};

Or just the union named so it's clear it's a union? That'd make it

pdu->u.meta

and so forth. I think that might be cleaner.
Christoph Hellwig Sept. 26, 2022, 2:52 p.m. UTC | #7
On Mon, Sep 26, 2022 at 08:50:41AM -0600, Jens Axboe wrote:
> Or just the union named so it's clear it's a union? That'd make it
> 
> pdu->u.meta
> 
> and so forth. I think that might be cleaner.

Ok, that's at least a bit of a warning sign.
Jens Axboe Sept. 26, 2022, 2:54 p.m. UTC | #8
On 9/26/22 8:52 AM, Christoph Hellwig wrote:
> On Mon, Sep 26, 2022 at 08:50:41AM -0600, Jens Axboe wrote:
>> Or just the union named so it's clear it's a union? That'd make it
>>
>> pdu->u.meta
>>
>> and so forth. I think that might be cleaner.
> 
> Ok, that's at least a bit of a warning sign.

I'll go with that.
diff mbox series

Patch

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index c80b3ecca5c8..1ccc9dd6d434 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -349,9 +349,18 @@  struct nvme_uring_cmd_pdu {
 		struct bio *bio;
 		struct request *req;
 	};
-	void *meta; /* kernel-resident buffer */
-	void __user *meta_buffer;
 	u32 meta_len;
+	union {
+		struct {
+			void *meta; /* kernel-resident buffer */
+			void __user *meta_buffer;
+		};
+		struct {
+			u32 nvme_flags;
+			u32 nvme_status;
+			u64 result;
+		};
+	};
 };
 
 static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
@@ -360,11 +369,10 @@  static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
 	return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
 }
 
-static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
+static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd)
 {
 	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
 	struct request *req = pdu->req;
-	struct bio *bio = req->bio;
 	int status;
 	u64 result;
 
@@ -375,27 +383,43 @@  static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
 
 	result = le64_to_cpu(nvme_req(req)->result.u64);
 
-	if (pdu->meta)
+	if (pdu->meta_len)
 		status = nvme_finish_user_metadata(req, pdu->meta_buffer,
 					pdu->meta, pdu->meta_len, status);
-	if (bio)
-		blk_rq_unmap_user(bio);
+	if (req->bio)
+		blk_rq_unmap_user(req->bio);
 	blk_mq_free_request(req);
 
 	io_uring_cmd_done(ioucmd, status, result);
 }
 
+static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd)
+{
+	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+	int status;
+
+	if (pdu->nvme_flags & NVME_REQ_CANCELLED)
+		status = -EINTR;
+	else
+		status = pdu->nvme_status;
+
+	if (pdu->bio)
+		blk_rq_unmap_user(pdu->bio);
+
+	io_uring_cmd_done(ioucmd, status, pdu->result);
+}
+
 static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
 						blk_status_t err)
 {
 	struct io_uring_cmd *ioucmd = req->end_io_data;
 	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-	/* extract bio before reusing the same field for request */
-	struct bio *bio = pdu->bio;
 	void *cookie = READ_ONCE(ioucmd->cookie);
 
-	pdu->req = req;
-	req->bio = bio;
+	req->bio = pdu->bio;
+	pdu->nvme_flags = nvme_req(req)->flags;
+	pdu->nvme_status = nvme_req(req)->status;
+	pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
 
 	/*
 	 * For iopoll, complete it directly.
@@ -406,6 +430,29 @@  static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
 	else
 		io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);
 
+	blk_mq_free_request(req);
+	return RQ_END_IO_NONE;
+}
+
+static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
+						     blk_status_t err)
+{
+	struct io_uring_cmd *ioucmd = req->end_io_data;
+	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+	void *cookie = READ_ONCE(ioucmd->cookie);
+
+	req->bio = pdu->bio;
+	pdu->req = req;
+
+	/*
+	 * For iopoll, complete it directly.
+	 * Otherwise, move the completion to task work.
+	 */
+	if (cookie != NULL && blk_rq_is_poll(req))
+		nvme_uring_task_meta_cb(ioucmd);
+	else
+		io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb);
+
 	return RQ_END_IO_NONE;
 }
 
@@ -467,8 +514,6 @@  static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 			blk_flags);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
-	req->end_io = nvme_uring_cmd_end_io;
-	req->end_io_data = ioucmd;
 
 	if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) {
 		if (unlikely(!req->bio)) {
@@ -483,10 +528,15 @@  static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	}
 	/* to free bio on completion, as req->bio will be null at that time */
 	pdu->bio = req->bio;
-	pdu->meta = meta;
-	pdu->meta_buffer = nvme_to_user_ptr(d.metadata);
 	pdu->meta_len = d.metadata_len;
-
+	req->end_io_data = ioucmd;
+	if (pdu->meta_len) {
+		pdu->meta = meta;
+		pdu->meta_buffer = nvme_to_user_ptr(d.metadata);
+		req->end_io = nvme_uring_cmd_end_io_meta;
+	} else {
+		req->end_io = nvme_uring_cmd_end_io;
+	}
 	blk_execute_rq_nowait(req, false);
 	return -EIOCBQUEUED;
 }