diff mbox series

[2/3] nvme: add polling options for loop target

Message ID 20230322002350.4038048-3-kbusch@meta.com (mailing list archive)
State New, archived
Headers show
Series nvme fabrics polling fixes | expand

Commit Message

Keith Busch March 22, 2023, 12:23 a.m. UTC
From: Keith Busch <kbusch@kernel.org>

This is for mostly for testing purposes.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/target/loop.c | 63 +++++++++++++++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 5 deletions(-)

Comments

Chaitanya Kulkarni March 22, 2023, 1:47 a.m. UTC | #1
On 3/21/2023 5:23 PM, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> This is for mostly for testing purposes.
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---

LGTM, it will be nice to have a blktests for this,
as I think this is really useful.

Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>

-ck
Sagi Grimberg March 22, 2023, 7:44 a.m. UTC | #2
On 3/22/23 02:23, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> This is for mostly for testing purposes.
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>   drivers/nvme/target/loop.c | 63 +++++++++++++++++++++++++++++++++++---
>   1 file changed, 58 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
> index f2d24b2d992f8..0587ead60b09e 100644
> --- a/drivers/nvme/target/loop.c
> +++ b/drivers/nvme/target/loop.c
> @@ -22,6 +22,7 @@ struct nvme_loop_iod {
>   	struct nvmet_req	req;
>   	struct nvme_loop_queue	*queue;
>   	struct work_struct	work;
> +	struct work_struct	poll;
>   	struct sg_table		sg_table;
>   	struct scatterlist	first_sgl[];
>   };
> @@ -37,6 +38,7 @@ struct nvme_loop_ctrl {
>   	struct nvme_ctrl	ctrl;
>   
>   	struct nvmet_port	*port;
> +	u32			io_queues[HCTX_MAX_TYPES];
>   };
>   
>   static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
> @@ -76,7 +78,11 @@ static void nvme_loop_complete_rq(struct request *req)
>   	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
>   
>   	sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT);
> -	nvme_complete_rq(req);
> +
> +	if (req->mq_hctx->type != HCTX_TYPE_POLL || !in_interrupt())
> +		nvme_complete_rq(req);
> +	else
> +		queue_work(nvmet_wq, &iod->poll);
>   }
>   
>   static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue)
> @@ -120,6 +126,15 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
>   	}
>   }
>   
> +static void nvme_loop_poll_work(struct work_struct *work)
> +{
> +	struct nvme_loop_iod *iod =
> +		container_of(work, struct nvme_loop_iod, poll);
> +	struct request *req = blk_mq_rq_from_pdu(iod);
> +
> +	nvme_complete_rq(req);
> +}
> +
>   static void nvme_loop_execute_work(struct work_struct *work)
>   {
>   	struct nvme_loop_iod *iod =
> @@ -170,6 +185,30 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
>   	return BLK_STS_OK;
>   }
>   
> +static bool nvme_loop_poll_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
> +{
> +	struct blk_mq_hw_ctx *hctx = data;
> +	struct nvme_loop_iod *iod;
> +	struct request *rq;
> +
> +	rq = blk_mq_tag_to_rq(hctx->tags, bitnr);
> +	if (!rq)
> +		return true;
> +
> +	iod = blk_mq_rq_to_pdu(rq);
> +	flush_work(&iod->poll);

If we want to go down this route, I would think that maybe
it'd be better to add .poll to nvmet_req like .execute, that can
actually be wired to bio_poll ? for file it can be wired to fop.iopoll
Christoph Hellwig March 22, 2023, 8:23 a.m. UTC | #3
On Tue, Mar 21, 2023 at 05:23:49PM -0700, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> This is for mostly for testing purposes.

I have to admit I'd rather not merge this upstream.  Any good reason
why we'd absolutely would want to have it?
Daniel Wagner March 22, 2023, 8:46 a.m. UTC | #4
On Wed, Mar 22, 2023 at 09:23:10AM +0100, Christoph Hellwig wrote:
> On Tue, Mar 21, 2023 at 05:23:49PM -0700, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> > 
> > This is for mostly for testing purposes.
> 
> I have to admit I'd rather not merge this upstream.  Any good reason
> why we'd absolutely would want to have it?

The blktest I have written for this problem fails for loop without something
like this. We can certaintanly teach blktests not run a specific test for loop
but currently, the _require_nvme_trtype_is_fabrics check is including loop.
Christoph Hellwig March 22, 2023, 1:52 p.m. UTC | #5
On Wed, Mar 22, 2023 at 09:46:51AM +0100, Daniel Wagner wrote:
> The blktest I have written for this problem fails for loop without something
> like this. We can certaintanly teach blktests not run a specific test for loop
> but currently, the _require_nvme_trtype_is_fabrics check is including loop.

Who says that we could support polling on all current and future
fabrics transports?
Daniel Wagner March 22, 2023, 2:06 p.m. UTC | #6
On Wed, Mar 22, 2023 at 02:52:00PM +0100, Christoph Hellwig wrote:
> Who says that we could support polling on all current and future
> fabrics transports?

I just assumed this is a generic feature supposed to present in all transports.
I'll update my new blktest test to run only tcp or rdma.
Christoph Hellwig March 22, 2023, 2:20 p.m. UTC | #7
On Wed, Mar 22, 2023 at 03:06:19PM +0100, Daniel Wagner wrote:
> On Wed, Mar 22, 2023 at 02:52:00PM +0100, Christoph Hellwig wrote:
> > Who says that we could support polling on all current and future
> > fabrics transports?
> 
> I just assumed this is a generic feature supposed to present in all transports.
> I'll update my new blktest test to run only tcp or rdma.

The best idea would be to do a trival and error, that is do a _notrun
if trying to create a connection with the options fails.
Keith Busch March 22, 2023, 2:30 p.m. UTC | #8
On Wed, Mar 22, 2023 at 09:23:10AM +0100, Christoph Hellwig wrote:
> On Tue, Mar 21, 2023 at 05:23:49PM -0700, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> > 
> > This is for mostly for testing purposes.
> 
> I have to admit I'd rather not merge this upstream.  Any good reason
> why we'd absolutely would want to have it?

The only value is that it's the easiest fabric to exercise some of these
generic code paths, and it's how I validated the fix in patch 3. Otherwise this
is has no other practical use case, so I don't mind dropping it.

Let's just go with patch 3 only from this series. I'll rework patch 1 atop
Sagi's rdma affinity removal since it's a nice cleanup.
diff mbox series

Patch

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index f2d24b2d992f8..0587ead60b09e 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -22,6 +22,7 @@  struct nvme_loop_iod {
 	struct nvmet_req	req;
 	struct nvme_loop_queue	*queue;
 	struct work_struct	work;
+	struct work_struct	poll;
 	struct sg_table		sg_table;
 	struct scatterlist	first_sgl[];
 };
@@ -37,6 +38,7 @@  struct nvme_loop_ctrl {
 	struct nvme_ctrl	ctrl;
 
 	struct nvmet_port	*port;
+	u32			io_queues[HCTX_MAX_TYPES];
 };
 
 static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -76,7 +78,11 @@  static void nvme_loop_complete_rq(struct request *req)
 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
 
 	sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT);
-	nvme_complete_rq(req);
+
+	if (req->mq_hctx->type != HCTX_TYPE_POLL || !in_interrupt())
+		nvme_complete_rq(req);
+	else
+		queue_work(nvmet_wq, &iod->poll);
 }
 
 static struct blk_mq_tags *nvme_loop_tagset(struct nvme_loop_queue *queue)
@@ -120,6 +126,15 @@  static void nvme_loop_queue_response(struct nvmet_req *req)
 	}
 }
 
+static void nvme_loop_poll_work(struct work_struct *work)
+{
+	struct nvme_loop_iod *iod =
+		container_of(work, struct nvme_loop_iod, poll);
+	struct request *req = blk_mq_rq_from_pdu(iod);
+
+	nvme_complete_rq(req);
+}
+
 static void nvme_loop_execute_work(struct work_struct *work)
 {
 	struct nvme_loop_iod *iod =
@@ -170,6 +185,30 @@  static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_STS_OK;
 }
 
+static bool nvme_loop_poll_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+	struct nvme_loop_iod *iod;
+	struct request *rq;
+
+	rq = blk_mq_tag_to_rq(hctx->tags, bitnr);
+	if (!rq)
+		return true;
+
+	iod = blk_mq_rq_to_pdu(rq);
+	flush_work(&iod->poll);
+	return true;
+}
+
+static int nvme_loop_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
+{
+	struct blk_mq_tags *tags = hctx->tags;
+	struct sbitmap_queue *btags = &tags->bitmap_tags;
+
+	sbitmap_for_each_set(&btags->sb, nvme_loop_poll_iter, hctx);
+	return 1;
+}
+
 static void nvme_loop_submit_async_event(struct nvme_ctrl *arg)
 {
 	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(arg);
@@ -197,6 +236,7 @@  static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
 	iod->req.cqe = &iod->cqe;
 	iod->queue = &ctrl->queues[queue_idx];
 	INIT_WORK(&iod->work, nvme_loop_execute_work);
+	INIT_WORK(&iod->poll, nvme_loop_poll_work);
 	return 0;
 }
 
@@ -247,11 +287,20 @@  static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	return 0;
 }
 
+static void nvme_loop_map_queues(struct blk_mq_tag_set *set)
+{
+	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(set->driver_data);
+
+	nvme_map_queues(set, &ctrl->ctrl, NULL, ctrl->io_queues);
+}
+
 static const struct blk_mq_ops nvme_loop_mq_ops = {
 	.queue_rq	= nvme_loop_queue_rq,
 	.complete	= nvme_loop_complete_rq,
 	.init_request	= nvme_loop_init_request,
 	.init_hctx	= nvme_loop_init_hctx,
+	.map_queues	= nvme_loop_map_queues,
+	.poll		= nvme_loop_poll,
 };
 
 static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
@@ -305,7 +354,7 @@  static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
 	unsigned int nr_io_queues;
 	int ret, i;
 
-	nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
+	nr_io_queues = nvme_nr_io_queues(opts);
 	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
 	if (ret || !nr_io_queues)
 		return ret;
@@ -321,6 +370,7 @@  static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
 		ctrl->ctrl.queue_count++;
 	}
 
+	nvme_set_io_queues(opts, nr_io_queues, ctrl->io_queues);
 	return 0;
 
 out_destroy_queues:
@@ -494,7 +544,7 @@  static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
 		return ret;
 
 	ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set,
-			&nvme_loop_mq_ops, 1,
+			&nvme_loop_mq_ops, ctrl->ctrl.opts->nr_poll_queues ? 3 : 2,
 			sizeof(struct nvme_loop_iod) +
 			NVME_INLINE_SG_CNT * sizeof(struct scatterlist));
 	if (ret)
@@ -534,6 +584,7 @@  static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
 	struct nvme_loop_ctrl *ctrl;
+	unsigned int nr_io_queues;
 	int ret;
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
@@ -559,7 +610,8 @@  static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 	ctrl->ctrl.kato = opts->kato;
 	ctrl->port = nvme_loop_find_port(&ctrl->ctrl);
 
-	ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
+	nr_io_queues = nvme_nr_io_queues(ctrl->ctrl.opts);;
+	ctrl->queues = kcalloc(nr_io_queues + 1, sizeof(*ctrl->queues),
 			GFP_KERNEL);
 	if (!ctrl->queues)
 		goto out_uninit_ctrl;
@@ -648,7 +700,8 @@  static struct nvmf_transport_ops nvme_loop_transport = {
 	.name		= "loop",
 	.module		= THIS_MODULE,
 	.create_ctrl	= nvme_loop_create_ctrl,
-	.allowed_opts	= NVMF_OPT_TRADDR,
+	.allowed_opts	= NVMF_OPT_TRADDR | NVMF_OPT_NR_WRITE_QUEUES |
+			  NVMF_OPT_NR_POLL_QUEUES,
 };
 
 static int __init nvme_loop_init_module(void)