Message ID | 20250328154647.2590171-4-csander@purestorage.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | nvme_map_user_request() cleanup | expand |
On 3/28/2025 9:16 PM, Caleb Sander Mateos wrote: > For NVMe passthru operations with fixed buffers, the fixed buffer lookup > happens in io_uring_cmd_import_fixed(). But nvme_uring_cmd_io() can > return -EAGAIN first from nvme_alloc_user_request() if all tags in the > tag set are in use. This ordering difference is observable when using > UBLK_U_IO_{,UN}REGISTER_IO_BUF SQEs to modify the fixed buffer table. If > the NVMe passthru operation is followed by UBLK_U_IO_UNREGISTER_IO_BUF > to unregister the fixed buffer and the NVMe passthru goes async, the > fixed buffer lookup will fail because it happens after the unregister. while the patch looks fine, I wonder what setup is required to trigger/test this. Given that io_uring NVMe passthru is on the char device node, and ublk does not take char device as the backing file. Care to explain?
On Mon, Mar 31, 2025 at 12:16:58PM +0530, Kanchan Joshi wrote: > On 3/28/2025 9:16 PM, Caleb Sander Mateos wrote: > > For NVMe passthru operations with fixed buffers, the fixed buffer lookup > > happens in io_uring_cmd_import_fixed(). But nvme_uring_cmd_io() can > > return -EAGAIN first from nvme_alloc_user_request() if all tags in the > > tag set are in use. This ordering difference is observable when using > > UBLK_U_IO_{,UN}REGISTER_IO_BUF SQEs to modify the fixed buffer table. If > > the NVMe passthru operation is followed by UBLK_U_IO_UNREGISTER_IO_BUF > > to unregister the fixed buffer and the NVMe passthru goes async, the > > fixed buffer lookup will fail because it happens after the unregister. > > while the patch looks fine, I wonder what setup is required to > trigger/test this. Given that io_uring NVMe passthru is on the char > device node, and ublk does not take char device as the backing file. > Care to explain? Not sure I understand the question. A ublk daemon can use anything it wants on the backend. Are you just referring to the public ublksrv implementation? That's not used here, if that's what you mean.
On 3/31/2025 8:06 PM, Keith Busch wrote: > On Mon, Mar 31, 2025 at 12:16:58PM +0530, Kanchan Joshi wrote: >> On 3/28/2025 9:16 PM, Caleb Sander Mateos wrote: >>> For NVMe passthru operations with fixed buffers, the fixed buffer lookup >>> happens in io_uring_cmd_import_fixed(). But nvme_uring_cmd_io() can >>> return -EAGAIN first from nvme_alloc_user_request() if all tags in the >>> tag set are in use. This ordering difference is observable when using >>> UBLK_U_IO_{,UN}REGISTER_IO_BUF SQEs to modify the fixed buffer table. If >>> the NVMe passthru operation is followed by UBLK_U_IO_UNREGISTER_IO_BUF >>> to unregister the fixed buffer and the NVMe passthru goes async, the >>> fixed buffer lookup will fail because it happens after the unregister. >> while the patch looks fine, I wonder what setup is required to >> trigger/test this. Given that io_uring NVMe passthru is on the char >> device node, and ublk does not take char device as the backing file. >> Care to explain? > Not sure I understand the question. A ublk daemon can use anything it > wants on the backend. Are you just referring to the public ublksrv > implementation? That's not used here, if that's what you mean. got it, I did not think beyond public ublksrv. The userspace block over nvme char-device sounds interesting.
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 42dfd29ed39e..400c3df0e58f 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -112,12 +112,11 @@ static struct request *nvme_alloc_user_request(struct request_queue *q, return req; } static int nvme_map_user_request(struct request *req, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - struct io_uring_cmd *ioucmd, unsigned int flags, - unsigned int iou_issue_flags) + struct iov_iter *iter, unsigned int flags) { struct request_queue *q = req->q; struct nvme_ns *ns = q->queuedata; struct block_device *bdev = ns ? ns->disk->part0 : NULL; bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); @@ -135,28 +134,16 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, if (!nvme_ctrl_meta_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked metadata buffer\n"); } - if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { - struct iov_iter iter; - - /* fixedbufs is only for non-vectored io */ - if (flags & NVME_IOCTL_VEC) - return -EINVAL; - - ret = io_uring_cmd_import_fixed(ubuffer, bufflen, - rq_data_dir(req), &iter, ioucmd, - iou_issue_flags); - if (ret < 0) - return ret; - ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); - } else { + if (iter) + ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); + else ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 0, rq_data_dir(req)); - } if (ret) return ret; bio = req->bio; @@ -194,11 +181,11 @@ static int nvme_submit_user_cmd(struct request_queue *q, return PTR_ERR(req); req->timeout = timeout; if (ubuffer && bufflen) { ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, - meta_len, NULL, flags, 0); + meta_len, NULL, flags); if (ret) goto out_free_req; } bio = req->bio; @@ -467,10 +454,12 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe); struct request_queue *q = ns ? ns->queue : ctrl->admin_q; struct nvme_uring_data d; struct nvme_command c; + struct iov_iter iter; + struct iov_iter *map_iter = NULL; struct request *req; blk_opf_t rq_flags = REQ_ALLOC_CACHE; blk_mq_req_flags_t blk_flags = 0; int ret; @@ -502,10 +491,24 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, d.addr = READ_ONCE(cmd->addr); d.data_len = READ_ONCE(cmd->data_len); d.metadata_len = READ_ONCE(cmd->metadata_len); d.timeout_ms = READ_ONCE(cmd->timeout_ms); + if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) { + /* fixedbufs is only for non-vectored io */ + if (vec) + return -EINVAL; + + ret = io_uring_cmd_import_fixed(d.addr, d.data_len, + nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd, + issue_flags); + if (ret < 0) + return ret; + + map_iter = &iter; + } + if (issue_flags & IO_URING_F_NONBLOCK) { rq_flags |= REQ_NOWAIT; blk_flags = BLK_MQ_REQ_NOWAIT; } if (issue_flags & IO_URING_F_IOPOLL) @@ -515,13 +518,13 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, if (IS_ERR(req)) return PTR_ERR(req); req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; if (d.data_len) { - ret = nvme_map_user_request(req, d.addr, - d.data_len, nvme_to_user_ptr(d.metadata), - d.metadata_len, ioucmd, vec, issue_flags); + ret = nvme_map_user_request(req, d.addr, d.data_len, + nvme_to_user_ptr(d.metadata), d.metadata_len, + map_iter, vec); if (ret) goto out_free_req; } /* to free bio on completion, as req->bio will be null at that time */