Message ID | 20230322002350.4038048-3-kbusch@meta.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | nvme fabrics polling fixes | expand |
On 3/21/2023 5:23 PM, Keith Busch wrote: > From: Keith Busch <kbusch@kernel.org> > > This is for mostly for testing purposes. > > Signed-off-by: Keith Busch <kbusch@kernel.org> > --- LGTM, it will be nice to have a blktests for this, as I think this is really useful. Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> -ck
On 3/22/23 02:23, Keith Busch wrote: > From: Keith Busch <kbusch@kernel.org> > > This is for mostly for testing purposes. > > Signed-off-by: Keith Busch <kbusch@kernel.org> > --- > drivers/nvme/target/loop.c | 63 +++++++++++++++++++++++++++++++++++--- > 1 file changed, 58 insertions(+), 5 deletions(-) > > diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c > index f2d24b2d992f8..0587ead60b09e 100644 > --- a/drivers/nvme/target/loop.c > +++ b/drivers/nvme/target/loop.c > @@ -22,6 +22,7 @@ struct nvme_loop_iod { > struct nvmet_req req; > struct nvme_loop_queue *queue; > struct work_struct work; > + struct work_struct poll; > struct sg_table sg_table; > struct scatterlist first_sgl[]; > }; > @@ -37,6 +38,7 @@ struct nvme_loop_ctrl { > struct nvme_ctrl ctrl; > > struct nvmet_port *port; > + u32 io_queues[HCTX_MAX_TYPES]; > }; > > static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) > @@ -76,7 +78,11 @@ static void nvme_loop_complete_rq(struct request *req) > struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); > > sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT); > - nvme_complete_rq(req); > + > + if (req->mq_hctx->type != HCTX_TYPE_POLL || !in_interrupt()) > + nvme_complete_rq(req); > + else > + queue_work(nvmet_wq, &iod->poll); > } > > static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue) > @@ -120,6 +126,15 @@ static void nvme_loop_queue_response(struct nvmet_req *req) > } > } > > +static void nvme_loop_poll_work(struct work_struct *work) > +{ > + struct nvme_loop_iod *iod = > + container_of(work, struct nvme_loop_iod, poll); > + struct request *req = blk_mq_rq_from_pdu(iod); > + > + nvme_complete_rq(req); > +} > + > static void nvme_loop_execute_work(struct work_struct *work) > { > struct nvme_loop_iod *iod = > @@ -170,6 +185,30 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, > return BLK_STS_OK; > } > > +static bool nvme_loop_poll_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) > +{ > + struct blk_mq_hw_ctx *hctx = data; > + struct nvme_loop_iod *iod; > + struct request *rq; > + > + rq = blk_mq_tag_to_rq(hctx->tags, bitnr); > + if (!rq) > + return true; > + > + iod = blk_mq_rq_to_pdu(rq); > + flush_work(&iod->poll); If we want to go down this route, I would think that maybe it'd be better to add .poll to nvmet_req like .execute, that can actually be wired to bio_poll ? for file it can be wired to fop.iopoll
On Tue, Mar 21, 2023 at 05:23:49PM -0700, Keith Busch wrote: > From: Keith Busch <kbusch@kernel.org> > > This is for mostly for testing purposes. I have to admit I'd rather not merge this upstream. Any good reason why we'd absolutely would want to have it?
On Wed, Mar 22, 2023 at 09:23:10AM +0100, Christoph Hellwig wrote: > On Tue, Mar 21, 2023 at 05:23:49PM -0700, Keith Busch wrote: > > From: Keith Busch <kbusch@kernel.org> > > > > This is for mostly for testing purposes. > > I have to admit I'd rather not merge this upstream. Any good reason > why we'd absolutely would want to have it? The blktest I have written for this problem fails for loop without something like this. We can certaintanly teach blktests not run a specific test for loop but currently, the _require_nvme_trtype_is_fabrics check is including loop.
On Wed, Mar 22, 2023 at 09:46:51AM +0100, Daniel Wagner wrote: > The blktest I have written for this problem fails for loop without something > like this. We can certaintanly teach blktests not run a specific test for loop > but currently, the _require_nvme_trtype_is_fabrics check is including loop. Who says that we could support polling on all current and future fabrics transports?
On Wed, Mar 22, 2023 at 02:52:00PM +0100, Christoph Hellwig wrote: > Who says that we could support polling on all current and future > fabrics transports? I just assumed this is a generic feature supposed to present in all transports. I'll update my new blktest test to run only tcp or rdma.
On Wed, Mar 22, 2023 at 03:06:19PM +0100, Daniel Wagner wrote: > On Wed, Mar 22, 2023 at 02:52:00PM +0100, Christoph Hellwig wrote: > > Who says that we could support polling on all current and future > > fabrics transports? > > I just assumed this is a generic feature supposed to present in all transports. > I'll update my new blktest test to run only tcp or rdma. The best idea would be to do a trival and error, that is do a _notrun if trying to create a connection with the options fails.
On Wed, Mar 22, 2023 at 09:23:10AM +0100, Christoph Hellwig wrote: > On Tue, Mar 21, 2023 at 05:23:49PM -0700, Keith Busch wrote: > > From: Keith Busch <kbusch@kernel.org> > > > > This is for mostly for testing purposes. > > I have to admit I'd rather not merge this upstream. Any good reason > why we'd absolutely would want to have it? The only value is that it's the easiest fabric to exercise some of these generic code paths, and it's how I validated the fix in patch 3. Otherwise this is has no other practical use case, so I don't mind dropping it. Let's just go with patch 3 only from this series. I'll rework patch 1 atop Sagi's rdma affinity removal since it's a nice cleanup.
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index f2d24b2d992f8..0587ead60b09e 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -22,6 +22,7 @@ struct nvme_loop_iod { struct nvmet_req req; struct nvme_loop_queue *queue; struct work_struct work; + struct work_struct poll; struct sg_table sg_table; struct scatterlist first_sgl[]; }; @@ -37,6 +38,7 @@ struct nvme_loop_ctrl { struct nvme_ctrl ctrl; struct nvmet_port *port; + u32 io_queues[HCTX_MAX_TYPES]; }; static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) @@ -76,7 +78,11 @@ static void nvme_loop_complete_rq(struct request *req) struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT); - nvme_complete_rq(req); + + if (req->mq_hctx->type != HCTX_TYPE_POLL || !in_interrupt()) + nvme_complete_rq(req); + else + queue_work(nvmet_wq, &iod->poll); } static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue) @@ -120,6 +126,15 @@ static void nvme_loop_queue_response(struct nvmet_req *req) } } +static void nvme_loop_poll_work(struct work_struct *work) +{ + struct nvme_loop_iod *iod = + container_of(work, struct nvme_loop_iod, poll); + struct request *req = blk_mq_rq_from_pdu(iod); + + nvme_complete_rq(req); +} + static void nvme_loop_execute_work(struct work_struct *work) { struct nvme_loop_iod *iod = @@ -170,6 +185,30 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } +static bool nvme_loop_poll_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) +{ + struct blk_mq_hw_ctx *hctx = data; + struct nvme_loop_iod *iod; + struct request *rq; + + rq = blk_mq_tag_to_rq(hctx->tags, bitnr); + if (!rq) + return true; + + iod = blk_mq_rq_to_pdu(rq); + flush_work(&iod->poll); + return true; +} + +static int nvme_loop_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) +{ + struct blk_mq_tags *tags = hctx->tags; + struct sbitmap_queue *btags = &tags->bitmap_tags; + + sbitmap_for_each_set(&btags->sb, nvme_loop_poll_iter, hctx); + return 1; +} + static void nvme_loop_submit_async_event(struct nvme_ctrl *arg) { struct nvme_loop_ctrl *ctrl = to_loop_ctrl(arg); @@ -197,6 +236,7 @@ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, iod->req.cqe = &iod->cqe; iod->queue = &ctrl->queues[queue_idx]; INIT_WORK(&iod->work, nvme_loop_execute_work); + INIT_WORK(&iod->poll, nvme_loop_poll_work); return 0; } @@ -247,11 +287,20 @@ static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } +static void nvme_loop_map_queues(struct blk_mq_tag_set *set) +{ + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(set->driver_data); + + nvme_map_queues(set, &ctrl->ctrl, NULL, ctrl->io_queues); +} + static const struct blk_mq_ops nvme_loop_mq_ops = { .queue_rq = nvme_loop_queue_rq, .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_request, .init_hctx = nvme_loop_init_hctx, + .map_queues = nvme_loop_map_queues, + .poll = nvme_loop_poll, }; static const struct blk_mq_ops nvme_loop_admin_mq_ops = { @@ -305,7 +354,7 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) unsigned int nr_io_queues; int ret, i; - nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + nr_io_queues = nvme_nr_io_queues(opts); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret || !nr_io_queues) return ret; @@ -321,6 +370,7 @@ static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) ctrl->ctrl.queue_count++; } + nvme_set_io_queues(opts, nr_io_queues, ctrl->io_queues); return 0; out_destroy_queues: @@ -494,7 +544,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) return ret; ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, - &nvme_loop_mq_ops, 1, + &nvme_loop_mq_ops, ctrl->ctrl.opts->nr_poll_queues ? 3 : 2, sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (ret) @@ -534,6 +584,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_loop_ctrl *ctrl; + unsigned int nr_io_queues; int ret; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); @@ -559,7 +610,8 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ctrl->ctrl.kato = opts->kato; ctrl->port = nvme_loop_find_port(&ctrl->ctrl); - ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues), + nr_io_queues = nvme_nr_io_queues(ctrl->ctrl.opts);; + ctrl->queues = kcalloc(nr_io_queues + 1, sizeof(*ctrl->queues), GFP_KERNEL); if (!ctrl->queues) goto out_uninit_ctrl; @@ -648,7 +700,8 @@ static struct nvmf_transport_ops nvme_loop_transport = { .name = "loop", .module = THIS_MODULE, .create_ctrl = nvme_loop_create_ctrl, - .allowed_opts = NVMF_OPT_TRADDR, + .allowed_opts = NVMF_OPT_TRADDR | NVMF_OPT_NR_WRITE_QUEUES | + NVMF_OPT_NR_POLL_QUEUES, }; static int __init nvme_loop_init_module(void)