Message ID | 20220308152105.309618-9-joshi.k@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | io_uring passthru over nvme | expand |
On Tue, Mar 08, 2022 at 08:50:56PM +0530, Kanchan Joshi wrote: > +/* Unlike blk_rq_map_user () this is only for fixed-buffer async passthrough. */ > +int blk_rq_map_user_fixedb(struct request_queue *q, struct request *rq, > + u64 ubuf, unsigned long len, gfp_t gfp_mask, > + struct io_uring_cmd *ioucmd) > +{ This doesn't belong into a patch title nvme. Also please add a proper kernel-doc comment. > +EXPORT_SYMBOL(blk_rq_map_user_fixedb); EXPORT_SYMBOL_GPL, please. > +static inline bool nvme_is_fixedb_passthru(struct io_uring_cmd *ioucmd) > +{ > + return ((ioucmd) && (ioucmd->flags & IO_URING_F_UCMD_FIXEDBUFS)); > +} No need for the outer and first set of inner braces. > + > static int nvme_submit_user_cmd(struct request_queue *q, > - struct nvme_command *cmd, void __user *ubuffer, > + struct nvme_command *cmd, u64 ubuffer, > unsigned bufflen, void __user *meta_buffer, unsigned meta_len, > u32 meta_seed, u64 *result, unsigned timeout, > struct io_uring_cmd *ioucmd) > @@ -152,8 +157,12 @@ static int nvme_submit_user_cmd(struct request_queue *q, > nvme_req(req)->flags |= NVME_REQ_USERCMD; > > if (ubuffer && bufflen) { > - ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, > - GFP_KERNEL); > + if (likely(nvme_is_fixedb_passthru(ioucmd))) > + ret = blk_rq_map_user_fixedb(q, req, ubuffer, bufflen, > + GFP_KERNEL, ioucmd); > + else > + ret = blk_rq_map_user(q, req, NULL, nvme_to_user_ptr(ubuffer), Overly long line.
> +int blk_rq_map_user_fixedb(struct request_queue *q, struct request *rq, > + u64 ubuf, unsigned long len, gfp_t gfp_mask, > + struct io_uring_cmd *ioucmd) Looking at this a bit more, I don't think this is a good interface or works at all for that matter. > +{ > + struct iov_iter iter; > + size_t iter_count, nr_segs; > + struct bio *bio; > + int ret; > + > + /* > + * Talk to io_uring to obtain BVEC iterator for the buffer. > + * And use that iterator to form bio/request. > + */ > + ret = io_uring_cmd_import_fixed(ubuf, len, rq_data_dir(rq), &iter, > + ioucmd); Instead of pulling the io-uring dependency into blk-map.c we could just pass the iter to a helper function and have that as the block layer abstraction if we really want one. But: > + if (unlikely(ret < 0)) > + return ret; > + iter_count = iov_iter_count(&iter); > + nr_segs = iter.nr_segs; > + > + if (!iter_count || (iter_count >> 9) > queue_max_hw_sectors(q)) > + return -EINVAL; > + if (nr_segs > queue_max_segments(q)) > + return -EINVAL; > + /* no iovecs to alloc, as we already have a BVEC iterator */ > + bio = bio_alloc(gfp_mask, 0); > + if (!bio) > + return -ENOMEM; > + > + ret = bio_iov_iter_get_pages(bio, &iter); I can't see how this works at all. block drivers have a lot more requirements than just total size and number of segments. Very typical is a limit on the size of each sector, and for nvme we also have the weird virtual boundary for the PRPs. None of that is being checked here. You really need to use bio_add_pc_page or open code the equivalent checks for passthrough I/O. > + if (likely(nvme_is_fixedb_passthru(ioucmd))) > + ret = blk_rq_map_user_fixedb(q, req, ubuffer, bufflen, > + GFP_KERNEL, ioucmd); And I'm also really worried about only supporting fixed buffers. Fixed buffers are a really nice benchmarketing feature, but without supporting arbitrary buffers this is rather useless in real life.
On Tue, Mar 08, 2022 at 08:50:56PM +0530, Kanchan Joshi wrote: > From: Anuj Gupta <anuj20.g@samsung.com> > > Add support to carry out passthrough command with pre-mapped buffers. > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> > --- > block/blk-map.c | 45 +++++++++++++++++++++++++++++++++++++++ > drivers/nvme/host/ioctl.c | 27 ++++++++++++++--------- > include/linux/blk-mq.h | 2 ++ > 3 files changed, 64 insertions(+), 10 deletions(-) > > diff --git a/block/blk-map.c b/block/blk-map.c > index 4526adde0156..027e8216e313 100644 > --- a/block/blk-map.c > +++ b/block/blk-map.c > @@ -8,6 +8,7 @@ > #include <linux/bio.h> > #include <linux/blkdev.h> > #include <linux/uio.h> > +#include <linux/io_uring.h> > > #include "blk.h" > > @@ -577,6 +578,50 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, > } > EXPORT_SYMBOL(blk_rq_map_user); > > +/* Unlike blk_rq_map_user () this is only for fixed-buffer async passthrough. */ > +int blk_rq_map_user_fixedb(struct request_queue *q, struct request *rq, > + u64 ubuf, unsigned long len, gfp_t gfp_mask, > + struct io_uring_cmd *ioucmd) > +{ > + struct iov_iter iter; > + size_t iter_count, nr_segs; > + struct bio *bio; > + int ret; > + > + /* > + * Talk to io_uring to obtain BVEC iterator for the buffer. > + * And use that iterator to form bio/request. > + */ > + ret = io_uring_cmd_import_fixed(ubuf, len, rq_data_dir(rq), &iter, > + ioucmd); > + if (unlikely(ret < 0)) > + return ret; > + iter_count = iov_iter_count(&iter); > + nr_segs = iter.nr_segs; > + > + if (!iter_count || (iter_count >> 9) > queue_max_hw_sectors(q)) > + return -EINVAL; > + if (nr_segs > queue_max_segments(q)) > + return -EINVAL; > + /* no iovecs to alloc, as we already have a BVEC iterator */ > + bio = bio_alloc(gfp_mask, 0); > + if (!bio) > + return -ENOMEM; > + > + ret = bio_iov_iter_get_pages(bio, &iter); Here bio_iov_iter_get_pages() may not work as expected since the code needs to check queue limit before adding page to bio and we don't run split for passthrough bio. __bio_iov_append_get_pages() may be generalized for covering this case. Thanks, Ming
On Fri, Mar 11, 2022 at 12:13 PM Christoph Hellwig <hch@lst.de> wrote: > > > +int blk_rq_map_user_fixedb(struct request_queue *q, struct request *rq, > > + u64 ubuf, unsigned long len, gfp_t gfp_mask, > > + struct io_uring_cmd *ioucmd) > > Looking at this a bit more, I don't think this is a good interface or > works at all for that matter. > > > +{ > > + struct iov_iter iter; > > + size_t iter_count, nr_segs; > > + struct bio *bio; > > + int ret; > > + > > + /* > > + * Talk to io_uring to obtain BVEC iterator for the buffer. > > + * And use that iterator to form bio/request. > > + */ > > + ret = io_uring_cmd_import_fixed(ubuf, len, rq_data_dir(rq), &iter, > > + ioucmd); > > Instead of pulling the io-uring dependency into blk-map.c we could just > pass the iter to a helper function and have that as the block layer > abstraction if we really want one. But: > > > + if (unlikely(ret < 0)) > > + return ret; > > + iter_count = iov_iter_count(&iter); > > + nr_segs = iter.nr_segs; > > + > > + if (!iter_count || (iter_count >> 9) > queue_max_hw_sectors(q)) > > + return -EINVAL; > > + if (nr_segs > queue_max_segments(q)) > > + return -EINVAL; > > + /* no iovecs to alloc, as we already have a BVEC iterator */ > > + bio = bio_alloc(gfp_mask, 0); > > + if (!bio) > > + return -ENOMEM; > > + > > + ret = bio_iov_iter_get_pages(bio, &iter); > > I can't see how this works at all. block drivers have a lot more > requirements than just total size and number of segments. Very typical > is a limit on the size of each sector, and for nvme we also have the > weird virtual boundary for the PRPs. None of that is being checked here. > You really need to use bio_add_pc_page or open code the equivalent checks > for passthrough I/O. Indeed, I'm missing those checks. Will fix up. > > + if (likely(nvme_is_fixedb_passthru(ioucmd))) > > + ret = blk_rq_map_user_fixedb(q, req, ubuffer, bufflen, > > + GFP_KERNEL, ioucmd); > > And I'm also really worried about only supporting fixed buffers. Fixed > buffers are a really nice benchmarketing feature, but without supporting > arbitrary buffers this is rather useless in real life. Sorry, I did not get your point on arbitrary buffers. The goal has been to match/surpass io_uring's block-io peak perf, so pre-mapped buffers had to be added.
On Mon, Mar 14, 2022 at 5:49 PM Ming Lei <ming.lei@redhat.com> wrote: > > On Tue, Mar 08, 2022 at 08:50:56PM +0530, Kanchan Joshi wrote: > > From: Anuj Gupta <anuj20.g@samsung.com> > > > > Add support to carry out passthrough command with pre-mapped buffers. > > > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > > Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> > > --- > > block/blk-map.c | 45 +++++++++++++++++++++++++++++++++++++++ > > drivers/nvme/host/ioctl.c | 27 ++++++++++++++--------- > > include/linux/blk-mq.h | 2 ++ > > 3 files changed, 64 insertions(+), 10 deletions(-) > > > > diff --git a/block/blk-map.c b/block/blk-map.c > > index 4526adde0156..027e8216e313 100644 > > --- a/block/blk-map.c > > +++ b/block/blk-map.c > > @@ -8,6 +8,7 @@ > > #include <linux/bio.h> > > #include <linux/blkdev.h> > > #include <linux/uio.h> > > +#include <linux/io_uring.h> > > > > #include "blk.h" > > > > @@ -577,6 +578,50 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, > > } > > EXPORT_SYMBOL(blk_rq_map_user); > > > > +/* Unlike blk_rq_map_user () this is only for fixed-buffer async passthrough. */ > > +int blk_rq_map_user_fixedb(struct request_queue *q, struct request *rq, > > + u64 ubuf, unsigned long len, gfp_t gfp_mask, > > + struct io_uring_cmd *ioucmd) > > +{ > > + struct iov_iter iter; > > + size_t iter_count, nr_segs; > > + struct bio *bio; > > + int ret; > > + > > + /* > > + * Talk to io_uring to obtain BVEC iterator for the buffer. > > + * And use that iterator to form bio/request. > > + */ > > + ret = io_uring_cmd_import_fixed(ubuf, len, rq_data_dir(rq), &iter, > > + ioucmd); > > + if (unlikely(ret < 0)) > > + return ret; > > + iter_count = iov_iter_count(&iter); > > + nr_segs = iter.nr_segs; > > + > > + if (!iter_count || (iter_count >> 9) > queue_max_hw_sectors(q)) > > + return -EINVAL; > > + if (nr_segs > queue_max_segments(q)) > > + return -EINVAL; > > + /* no iovecs to alloc, as we already have a BVEC iterator */ > > + bio = bio_alloc(gfp_mask, 0); > > + if (!bio) > > + return -ENOMEM; > > + > > + ret = bio_iov_iter_get_pages(bio, &iter); > > Here bio_iov_iter_get_pages() may not work as expected since the code > needs to check queue limit before adding page to bio and we don't run > split for passthrough bio. __bio_iov_append_get_pages() may be generalized > for covering this case. Yes. That may just be the right thing to do. Thanks for the suggestion.
On Mon, Mar 14, 2022 at 06:36:17PM +0530, Kanchan Joshi wrote: > > And I'm also really worried about only supporting fixed buffers. Fixed > > buffers are a really nice benchmarketing feature, but without supporting > > arbitrary buffers this is rather useless in real life. > > Sorry, I did not get your point on arbitrary buffers. > The goal has been to match/surpass io_uring's block-io peak perf, so > pre-mapped buffers had to be added. I'm completely interesting in adding code to the nvme driver that is just intended for benchmarketing. The fixed buffers are nice for benchmarking and a very small number of real use cases (e.g. fixed size database log), but for this feature to be generally useful for the real world we'll need to support arbitrary user memory.
diff --git a/block/blk-map.c b/block/blk-map.c index 4526adde0156..027e8216e313 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -8,6 +8,7 @@ #include <linux/bio.h> #include <linux/blkdev.h> #include <linux/uio.h> +#include <linux/io_uring.h> #include "blk.h" @@ -577,6 +578,50 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_rq_map_user); +/* Unlike blk_rq_map_user () this is only for fixed-buffer async passthrough. */ +int blk_rq_map_user_fixedb(struct request_queue *q, struct request *rq, + u64 ubuf, unsigned long len, gfp_t gfp_mask, + struct io_uring_cmd *ioucmd) +{ + struct iov_iter iter; + size_t iter_count, nr_segs; + struct bio *bio; + int ret; + + /* + * Talk to io_uring to obtain BVEC iterator for the buffer. + * And use that iterator to form bio/request. + */ + ret = io_uring_cmd_import_fixed(ubuf, len, rq_data_dir(rq), &iter, + ioucmd); + if (unlikely(ret < 0)) + return ret; + iter_count = iov_iter_count(&iter); + nr_segs = iter.nr_segs; + + if (!iter_count || (iter_count >> 9) > queue_max_hw_sectors(q)) + return -EINVAL; + if (nr_segs > queue_max_segments(q)) + return -EINVAL; + /* no iovecs to alloc, as we already have a BVEC iterator */ + bio = bio_alloc(gfp_mask, 0); + if (!bio) + return -ENOMEM; + + ret = bio_iov_iter_get_pages(bio, &iter); + if (ret) + goto out_free; + + blk_rq_bio_prep(rq, bio, nr_segs); + return 0; + +out_free: + bio_release_pages(bio, false); + bio_put(bio); + return ret; +} +EXPORT_SYMBOL(blk_rq_map_user_fixedb); + /** * blk_rq_unmap_user - unmap a request with user data * @bio: start of bio list diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 1df270b47af5..91d893eedc82 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -123,8 +123,13 @@ static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, return ERR_PTR(ret); } +static inline bool nvme_is_fixedb_passthru(struct io_uring_cmd *ioucmd) +{ + return ((ioucmd) && (ioucmd->flags & IO_URING_F_UCMD_FIXEDBUFS)); +} + static int nvme_submit_user_cmd(struct request_queue *q, - struct nvme_command *cmd, void __user *ubuffer, + struct nvme_command *cmd, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u64 *result, unsigned timeout, struct io_uring_cmd *ioucmd) @@ -152,8 +157,12 @@ static int nvme_submit_user_cmd(struct request_queue *q, nvme_req(req)->flags |= NVME_REQ_USERCMD; if (ubuffer && bufflen) { - ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, - GFP_KERNEL); + if (likely(nvme_is_fixedb_passthru(ioucmd))) + ret = blk_rq_map_user_fixedb(q, req, ubuffer, bufflen, + GFP_KERNEL, ioucmd); + else + ret = blk_rq_map_user(q, req, NULL, nvme_to_user_ptr(ubuffer), + bufflen, GFP_KERNEL); if (ret) goto out; bio = req->bio; @@ -260,9 +269,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.appmask = cpu_to_le16(io.appmask); return nvme_submit_user_cmd(ns->queue, &c, - nvme_to_user_ptr(io.addr), length, - metadata, meta_len, lower_32_bits(io.slba), NULL, 0, - NULL); + io.addr, length, metadata, meta_len, + lower_32_bits(io.slba), NULL, 0, NULL); } static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, @@ -314,9 +322,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, timeout = msecs_to_jiffies(cmd.timeout_ms); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - nvme_to_user_ptr(cmd.addr), cmd.data_len, - nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, - 0, &result, timeout, NULL); + cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.metadata), + cmd.metadata_len, 0, &result, timeout, NULL); if (status >= 0) { if (put_user(result, &ucmd->result)) @@ -368,7 +375,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, timeout = msecs_to_jiffies(cptr->timeout_ms); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, - nvme_to_user_ptr(cptr->addr), cptr->data_len, + cptr->addr, cptr->data_len, nvme_to_user_ptr(cptr->metadata), cptr->metadata_len, 0, &cptr->result, timeout, ioucmd); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d319ffa59354..48bcfd194bdc 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -966,6 +966,8 @@ struct rq_map_data { int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); +int blk_rq_map_user_fixedb(struct request_queue *, struct request *, + u64 ubuf, unsigned long, gfp_t, struct io_uring_cmd *); int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); int blk_rq_unmap_user(struct bio *);