diff mbox series

[13/17] nvme: allow user passthrough commands to poll

Message ID 20220308152105.309618-14-joshi.k@samsung.com (mailing list archive)
State New, archived
Headers show
Series io_uring passthru over nvme | expand

Commit Message

Kanchan Joshi March 8, 2022, 3:21 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

The block layer knows how to deal with polled requests. Let the NVMe
driver use the previously reserved user "flags" fields to define an
option to allocate the request from the polled hardware contexts. If
polling is not enabled, then the block layer will automatically fallback
to a non-polled request.[1]

[1] https://lore.kernel.org/linux-block/20210517171443.GB2709391@dhcp-10-100-145-180.wdc.com/

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/ioctl.c       | 30 ++++++++++++++++--------------
 include/uapi/linux/nvme_ioctl.h |  4 ++++
 2 files changed, 20 insertions(+), 14 deletions(-)

Comments

Keith Busch March 8, 2022, 5:08 p.m. UTC | #1
On Tue, Mar 08, 2022 at 08:51:01PM +0530, Kanchan Joshi wrote:
>  	if (copy_from_user(&io, uio, sizeof(io)))
>  		return -EFAULT;
> -	if (io.flags)
> -		return -EINVAL;
> +	if (io.flags & NVME_HIPRI)
> +		rq_flags |= REQ_POLLED;

I'm pretty sure we can repurpose this previously reserved field for this
kind of special handling without an issue now, but we should continue
returning EINVAL if any unknown flags are set. I have no idea what, if
any, new flags may be defined later, so we shouldn't let a future
application think an older driver honored something we are not handling.
Kanchan Joshi March 9, 2022, 7:03 a.m. UTC | #2
On Tue, Mar 8, 2022 at 10:39 PM Keith Busch <kbusch@kernel.org> wrote:
>
> On Tue, Mar 08, 2022 at 08:51:01PM +0530, Kanchan Joshi wrote:
> >       if (copy_from_user(&io, uio, sizeof(io)))
> >               return -EFAULT;
> > -     if (io.flags)
> > -             return -EINVAL;
> > +     if (io.flags & NVME_HIPRI)
> > +             rq_flags |= REQ_POLLED;
>
> I'm pretty sure we can repurpose this previously reserved field for this
> kind of special handling without an issue now, but we should continue
> returning EINVAL if any unknown flags are set. I have no idea what, if
> any, new flags may be defined later, so we shouldn't let a future
> application think an older driver honored something we are not handling.

Would it be better if we don't try to pass NVME_HIPRI by any means
(flags or rsvd1/rsvd2), and that means not enabling sync-polling and
killing this patch.
We have another flag "IO_URING_F_UCMD_POLLED" in ioucmd->flags, and we
can use that instead to enable only the async polling. What do you
think?
Christoph Hellwig March 11, 2022, 6:49 a.m. UTC | #3
On Wed, Mar 09, 2022 at 12:33:33PM +0530, Kanchan Joshi wrote:
> Would it be better if we don't try to pass NVME_HIPRI by any means
> (flags or rsvd1/rsvd2), and that means not enabling sync-polling and
> killing this patch.
> We have another flag "IO_URING_F_UCMD_POLLED" in ioucmd->flags, and we
> can use that instead to enable only the async polling. What do you
> think?

Yes, polling should be a io_uring level feature.
diff mbox series

Patch

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index a4cde210aab9..a6712fb3eb98 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -132,7 +132,7 @@  static int nvme_submit_user_cmd(struct request_queue *q,
 		struct nvme_command *cmd, u64 ubuffer,
 		unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
 		u32 meta_seed, u64 *result, unsigned timeout,
-		struct io_uring_cmd *ioucmd)
+		struct io_uring_cmd *ioucmd, unsigned int rq_flags)
 {
 	bool write = nvme_is_write(cmd);
 	struct nvme_ns *ns = q->queuedata;
@@ -140,7 +140,6 @@  static int nvme_submit_user_cmd(struct request_queue *q,
 	struct request *req;
 	struct bio *bio = NULL;
 	void *meta = NULL;
-	unsigned int rq_flags = 0;
 	blk_mq_req_flags_t blk_flags = 0;
 	int ret;
 
@@ -216,11 +215,12 @@  static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	struct nvme_command c;
 	unsigned length, meta_len;
 	void __user *metadata;
+	unsigned int rq_flags = 0;
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
-	if (io.flags)
-		return -EINVAL;
+	if (io.flags & NVME_HIPRI)
+		rq_flags |= REQ_POLLED;
 
 	switch (io.opcode) {
 	case nvme_cmd_write:
@@ -258,7 +258,7 @@  static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 
 	memset(&c, 0, sizeof(c));
 	c.rw.opcode = io.opcode;
-	c.rw.flags = io.flags;
+	c.rw.flags = 0;
 	c.rw.nsid = cpu_to_le32(ns->head->ns_id);
 	c.rw.slba = cpu_to_le64(io.slba);
 	c.rw.length = cpu_to_le16(io.nblocks);
@@ -270,7 +270,7 @@  static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 
 	return nvme_submit_user_cmd(ns->queue, &c,
 			io.addr, length, metadata, meta_len,
-			lower_32_bits(io.slba), NULL, 0, NULL);
+			lower_32_bits(io.slba), NULL, 0, NULL, rq_flags);
 }
 
 static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
@@ -292,6 +292,7 @@  static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 {
 	struct nvme_passthru_cmd cmd;
 	struct nvme_command c;
+	unsigned int rq_flags = 0;
 	unsigned timeout = 0;
 	u64 result;
 	int status;
@@ -300,14 +301,14 @@  static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 		return -EACCES;
 	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
 		return -EFAULT;
-	if (cmd.flags)
-		return -EINVAL;
+	if (cmd.flags & NVME_HIPRI)
+		rq_flags |= REQ_POLLED;
 	if (!nvme_validate_passthru_nsid(ctrl, ns, cmd.nsid))
 		return -EINVAL;
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = cmd.opcode;
-	c.common.flags = cmd.flags;
+	c.common.flags = 0;
 	c.common.nsid = cpu_to_le32(cmd.nsid);
 	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
 	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
@@ -323,7 +324,7 @@  static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 
 	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
 			cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata),
-			cmd.metadata_len, 0, &result, timeout, NULL);
+			cmd.metadata_len, 0, &result, timeout, NULL, rq_flags);
 
 	if (status >= 0) {
 		if (put_user(result, &ucmd->result))
@@ -339,6 +340,7 @@  static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 {
 	struct nvme_passthru_cmd64 cmd, *cptr;
 	struct nvme_command c;
+	unsigned int rq_flags = 0;
 	unsigned timeout = 0;
 	int status;
 
@@ -353,14 +355,14 @@  static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 			return -EINVAL;
 		cptr = (struct nvme_passthru_cmd64 *)ioucmd->cmd;
 	}
-	if (cptr->flags)
-		return -EINVAL;
+	if (cptr->flags & NVME_HIPRI)
+		rq_flags |= REQ_POLLED;
 	if (!nvme_validate_passthru_nsid(ctrl, ns, cptr->nsid))
 		return -EINVAL;
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = cptr->opcode;
-	c.common.flags = cptr->flags;
+	c.common.flags = 0;
 	c.common.nsid = cpu_to_le32(cptr->nsid);
 	c.common.cdw2[0] = cpu_to_le32(cptr->cdw2);
 	c.common.cdw2[1] = cpu_to_le32(cptr->cdw3);
@@ -377,7 +379,7 @@  static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
 			cptr->addr, cptr->data_len,
 			nvme_to_user_ptr(cptr->metadata), cptr->metadata_len,
-			0, &cptr->result, timeout, ioucmd);
+			0, &cptr->result, timeout, ioucmd, rq_flags);
 
 	if (!ioucmd && status >= 0) {
 		if (put_user(cptr->result, &ucmd->result))
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index d99b5a772698..df2c138c38d9 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -9,6 +9,10 @@ 
 
 #include <linux/types.h>
 
+enum nvme_io_flags {
+	NVME_HIPRI      = 1 << 0, /* use polling queue if available */
+};
+
 struct nvme_user_io {
 	__u8	opcode;
 	__u8	flags;