diff mbox series

[9/9] nvme: wire up completion batching for the IRQ path

Message ID 20211013165416.985696-10-axboe@kernel.dk (mailing list archive)
State New, archived
Headers show
Series Batched completions | expand

Commit Message

Jens Axboe Oct. 13, 2021, 4:54 p.m. UTC
Trivial to do now, just need our own io_batch on the stack and pass that
in to the usual command completion handling.

I pondered making this dependent on how many entries we had to process,
but even for a single entry there's no discernable difference in
performance or latency. Running a sync workload over io_uring:

t/io_uring -b512 -d1 -s1 -c1 -p0 -F1 -B1 -n2 /dev/nvme1n1 /dev/nvme2n1

yields the below performance before the patch:

IOPS=254820, BW=124MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251174, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=250806, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)

and the following after:

IOPS=255972, BW=124MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251920, BW=123MiB/s, IOS/call=1/1, inflight=(1 1)
IOPS=251794, BW=122MiB/s, IOS/call=1/1, inflight=(1 1)

which definitely isn't slower, about the same if you factor in a bit of
variance. For peak performance workloads, benchmarking shows a 2%
improvement.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

Comments

Christoph Hellwig Oct. 14, 2021, 7:53 a.m. UTC | #1
Couldn't we do something like the incremental patch below to avoid
duplicating the CQ processing? With that this patch could probably go
away entirely and we could fold the change into the other nvme patch.

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 061f0b1cb0ec3..ce69e9666caac 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1059,9 +1059,8 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
 	}
 }
 
-static inline int nvme_process_cq(struct nvme_queue *nvmeq)
+static inline int nvme_poll_cq(struct nvme_queue *nvmeq, struct io_batch *iob)
 {
-	DEFINE_IO_BATCH(iob);
 	int found = 0;
 
 	while (nvme_cqe_pending(nvmeq)) {
@@ -1071,15 +1070,23 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq)
 		 * the cqe requires a full read memory barrier
 		 */
 		dma_rmb();
-		nvme_handle_cqe(nvmeq, &iob, nvmeq->cq_head);
+		nvme_handle_cqe(nvmeq, iob, nvmeq->cq_head);
 		nvme_update_cq_head(nvmeq);
 	}
 
-	if (found) {
-		if (iob.req_list)
-			nvme_pci_complete_batch(&iob);
+	if (found)
 		nvme_ring_cq_doorbell(nvmeq);
-	}
+	return found;
+}
+
+static inline int nvme_process_cq(struct nvme_queue *nvmeq)
+{
+	DEFINE_IO_BATCH(iob);
+	int found;
+
+	found = nvme_poll_cq(nvmeq, &iob);
+	if (iob.req_list)
+		nvme_pci_complete_batch(&iob);
 	return found;
 }
 
@@ -1116,27 +1123,6 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
 	enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
 }
 
-static inline int nvme_poll_cq(struct nvme_queue *nvmeq, struct io_batch *iob)
-{
-	int found = 0;
-
-	while (nvme_cqe_pending(nvmeq)) {
-		found++;
-		/*
-		 * load-load control dependency between phase and the rest of
-		 * the cqe requires a full read memory barrier
-		 */
-		dma_rmb();
-		nvme_handle_cqe(nvmeq, iob, nvmeq->cq_head);
-		nvme_update_cq_head(nvmeq);
-	}
-
-	if (found)
-		nvme_ring_cq_doorbell(nvmeq);
-	return found;
-}
-
-
 static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_batch *iob)
 {
 	struct nvme_queue *nvmeq = hctx->driver_data;
Jens Axboe Oct. 14, 2021, 3:49 p.m. UTC | #2
On 10/14/21 1:53 AM, Christoph Hellwig wrote:
> Couldn't we do something like the incremental patch below to avoid
> duplicating the CQ processing? With that this patch could probably go
> away entirely and we could fold the change into the other nvme patch.

Yeah should probably go into the first patch in some shape or form, I'll
take a look.
diff mbox series

Patch

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ae253f6f5c80..061f0b1cb0ec 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1061,6 +1061,7 @@  static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
 
 static inline int nvme_process_cq(struct nvme_queue *nvmeq)
 {
+	DEFINE_IO_BATCH(iob);
 	int found = 0;
 
 	while (nvme_cqe_pending(nvmeq)) {
@@ -1070,12 +1071,15 @@  static inline int nvme_process_cq(struct nvme_queue *nvmeq)
 		 * the cqe requires a full read memory barrier
 		 */
 		dma_rmb();
-		nvme_handle_cqe(nvmeq, NULL, nvmeq->cq_head);
+		nvme_handle_cqe(nvmeq, &iob, nvmeq->cq_head);
 		nvme_update_cq_head(nvmeq);
 	}
 
-	if (found)
+	if (found) {
+		if (iob.req_list)
+			nvme_pci_complete_batch(&iob);
 		nvme_ring_cq_doorbell(nvmeq);
+	}
 	return found;
 }