Message ID | 20211013165416.985696-7-axboe@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Batched completions | expand |
On Wed, Oct 13, 2021 at 10:54:13AM -0600, Jens Axboe wrote: > +void nvme_complete_batch_req(struct request *req) > +{ > + nvme_cleanup_cmd(req); > + nvme_end_req_zoned(req); > + req->status = BLK_STS_OK; > +} > +EXPORT_SYMBOL_GPL(nvme_complete_batch_req); > + I'd be tempted to just merge this helper into the only caller. nvme_cleanup_cmd is exported anyway, so this would just add an export for nvme_end_req_zoned. > +static __always_inline void nvme_complete_batch(struct io_batch *iob, > + void (*fn)(struct request *rq)) > +{ > + struct request *req; > + > + req = rq_list_peek(&iob->req_list); > + while (req) { > + fn(req); > + nvme_complete_batch_req(req); > + req = rq_list_next(req); > + } > + > + blk_mq_end_request_batch(iob); Can we turn this into a normal for loop? for (req = rq_list_peek(&iob->req_list); req; req = rq_list_next(req)) { .. } > + if (!nvme_try_complete_req(req, cqe->status, cqe->result)) { > + /* > + * Do normal inline completion if we don't have a batch > + * list, if we have an end_io handler, or if the status of > + * the request isn't just normal success. > + */ > + if (!iob || req->end_io || nvme_req(req)->status) > + nvme_pci_complete_rq(req); > + else > + rq_list_add_tail(&iob->req_list, req); > + } The check for the conditions where we can or cannot batch complete really should go into a block layer helper. Something like the incremental patch below: diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ce69e9666caac..57bef8229bfab 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1034,17 +1034,9 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, } trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); - if (!nvme_try_complete_req(req, cqe->status, cqe->result)) { - /* - * Do normal inline completion if we don't have a batch - * list, if we have an end_io handler, or if the status of - * the request isn't just normal success. - */ - if (!iob || req->end_io || nvme_req(req)->status) - nvme_pci_complete_rq(req); - else - rq_list_add_tail(&iob->req_list, req); - } + if (!nvme_try_complete_req(req, cqe->status, cqe->result) && + !blk_mq_add_to_batch(req, iob, nvme_req(req)->status)) + nvme_pci_complete_rq(req); } static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index aea7d866a34c6..383d887e32f6d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -773,6 +773,19 @@ void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); void blk_mq_end_request_batch(struct io_batch *ib); +/* + * Batched completions only work when there is no I/O error and not special + * ->end_io handler. + */ +static inline bool blk_mq_add_to_batch(struct request *req, + struct io_batch *iob, bool ioerror) +{ + if (!iob || req->end_io || ioerror) + return false; + rq_list_add_tail(&iob->req_list, req); + return true; +} + void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
On 10/14/21 1:43 AM, Christoph Hellwig wrote: > On Wed, Oct 13, 2021 at 10:54:13AM -0600, Jens Axboe wrote: >> +void nvme_complete_batch_req(struct request *req) >> +{ >> + nvme_cleanup_cmd(req); >> + nvme_end_req_zoned(req); >> + req->status = BLK_STS_OK; >> +} >> +EXPORT_SYMBOL_GPL(nvme_complete_batch_req); >> + > > I'd be tempted to just merge this helper into the only caller. > nvme_cleanup_cmd is exported anyway, so this would just add an export > for nvme_end_req_zoned. Sure, I can do that. >> +static __always_inline void nvme_complete_batch(struct io_batch *iob, >> + void (*fn)(struct request *rq)) >> +{ >> + struct request *req; >> + >> + req = rq_list_peek(&iob->req_list); >> + while (req) { >> + fn(req); >> + nvme_complete_batch_req(req); >> + req = rq_list_next(req); >> + } >> + >> + blk_mq_end_request_batch(iob); > > Can we turn this into a normal for loop? > > for (req = rq_list_peek(&iob->req_list); req; req = rq_list_next(req)) { > .. > } If you prefer it that way for nvme, for me the while () setup is much easier to read than a really long for line. >> + if (!nvme_try_complete_req(req, cqe->status, cqe->result)) { >> + /* >> + * Do normal inline completion if we don't have a batch >> + * list, if we have an end_io handler, or if the status of >> + * the request isn't just normal success. >> + */ >> + if (!iob || req->end_io || nvme_req(req)->status) >> + nvme_pci_complete_rq(req); >> + else >> + rq_list_add_tail(&iob->req_list, req); >> + } > > The check for the conditions where we can or cannot batch complete > really should go into a block layer helper. Something like the > incremental patch below: That's a good idea, I'll add that.
On 10/14/21 9:30 AM, Jens Axboe wrote: > On 10/14/21 1:43 AM, Christoph Hellwig wrote: >> On Wed, Oct 13, 2021 at 10:54:13AM -0600, Jens Axboe wrote: >>> +void nvme_complete_batch_req(struct request *req) >>> +{ >>> + nvme_cleanup_cmd(req); >>> + nvme_end_req_zoned(req); >>> + req->status = BLK_STS_OK; >>> +} >>> +EXPORT_SYMBOL_GPL(nvme_complete_batch_req); >>> + >> >> I'd be tempted to just merge this helper into the only caller. >> nvme_cleanup_cmd is exported anyway, so this would just add an export >> for nvme_end_req_zoned. > > Sure, I can do that. That'll turn it into two calls from the batch completion though, so I skipped this change.
On Thu, Oct 14, 2021 at 09:30:57AM -0600, Jens Axboe wrote: > > Can we turn this into a normal for loop? > > > > for (req = rq_list_peek(&iob->req_list); req; req = rq_list_next(req)) { > > .. > > } > > If you prefer it that way for nvme, for me the while () setup is much > easier to read than a really long for line. I prefer the loop over the while loop. My real preference would be a helper macro and do: for_each_rq(req, &iob->req_list) { as suggested last round.
On 10/14/21 10:07 AM, Christoph Hellwig wrote: > On Thu, Oct 14, 2021 at 09:30:57AM -0600, Jens Axboe wrote: >>> Can we turn this into a normal for loop? >>> >>> for (req = rq_list_peek(&iob->req_list); req; req = rq_list_next(req)) { >>> .. >>> } >> >> If you prefer it that way for nvme, for me the while () setup is much >> easier to read than a really long for line. > > I prefer the loop over the while loop. My real preference would be > a helper macro and do: > > for_each_rq(req, &iob->req_list) { > > as suggested last round. Sure, I can turn it into a helper and use that.
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c2c2e8545292..4b14258a3bac 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -346,15 +346,19 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) return RETRY; } -static inline void nvme_end_req(struct request *req) +static inline void nvme_end_req_zoned(struct request *req) { - blk_status_t status = nvme_error_status(nvme_req(req)->status); - if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && req_op(req) == REQ_OP_ZONE_APPEND) req->__sector = nvme_lba_to_sect(req->q->queuedata, le64_to_cpu(nvme_req(req)->result.u64)); +} + +static inline void nvme_end_req(struct request *req) +{ + blk_status_t status = nvme_error_status(nvme_req(req)->status); + nvme_end_req_zoned(req); nvme_trace_bio_complete(req); blk_mq_end_request(req, status); } @@ -381,6 +385,14 @@ void nvme_complete_rq(struct request *req) } EXPORT_SYMBOL_GPL(nvme_complete_rq); +void nvme_complete_batch_req(struct request *req) +{ + nvme_cleanup_cmd(req); + nvme_end_req_zoned(req); + req->status = BLK_STS_OK; +} +EXPORT_SYMBOL_GPL(nvme_complete_batch_req); + /* * Called to unwind from ->queue_rq on a failed command submission so that the * multipathing code gets called to potentially failover to another path. diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ed79a6c7e804..e0c079f704cf 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -638,6 +638,23 @@ static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) } void nvme_complete_rq(struct request *req); +void nvme_complete_batch_req(struct request *req); + +static __always_inline void nvme_complete_batch(struct io_batch *iob, + void (*fn)(struct request *rq)) +{ + struct request *req; + + req = rq_list_peek(&iob->req_list); + while (req) { + fn(req); + nvme_complete_batch_req(req); + req = rq_list_next(req); + } + + blk_mq_end_request_batch(iob); +} + blk_status_t nvme_host_path_error(struct request *req); bool nvme_cancel_request(struct request *req, void *data, bool reserved); void nvme_cancel_tagset(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9db6e23f41ef..ae253f6f5c80 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -959,7 +959,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } -static void nvme_pci_complete_rq(struct request *req) +static __always_inline void nvme_pci_unmap_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_dev *dev = iod->nvmeq->dev; @@ -969,9 +969,19 @@ static void nvme_pci_complete_rq(struct request *req) rq_integrity_vec(req)->bv_len, rq_data_dir(req)); if (blk_rq_nr_phys_segments(req)) nvme_unmap_data(dev, req); +} + +static void nvme_pci_complete_rq(struct request *req) +{ + nvme_pci_unmap_rq(req); nvme_complete_rq(req); } +static void nvme_pci_complete_batch(struct io_batch *iob) +{ + nvme_complete_batch(iob, nvme_pci_unmap_rq); +} + /* We read the CQE phase first to check if the rest of the entry is valid */ static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) { @@ -996,7 +1006,8 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; } -static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) +static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, + struct io_batch *iob, u16 idx) { struct nvme_completion *cqe = &nvmeq->cqes[idx]; __u16 command_id = READ_ONCE(cqe->command_id); @@ -1023,8 +1034,17 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) } trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); - if (!nvme_try_complete_req(req, cqe->status, cqe->result)) - nvme_pci_complete_rq(req); + if (!nvme_try_complete_req(req, cqe->status, cqe->result)) { + /* + * Do normal inline completion if we don't have a batch + * list, if we have an end_io handler, or if the status of + * the request isn't just normal success. + */ + if (!iob || req->end_io || nvme_req(req)->status) + nvme_pci_complete_rq(req); + else + rq_list_add_tail(&iob->req_list, req); + } } static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) @@ -1050,7 +1070,7 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq) * the cqe requires a full read memory barrier */ dma_rmb(); - nvme_handle_cqe(nvmeq, nvmeq->cq_head); + nvme_handle_cqe(nvmeq, NULL, nvmeq->cq_head); nvme_update_cq_head(nvmeq); } @@ -1092,6 +1112,27 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq) enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); } +static inline int nvme_poll_cq(struct nvme_queue *nvmeq, struct io_batch *iob) +{ + int found = 0; + + while (nvme_cqe_pending(nvmeq)) { + found++; + /* + * load-load control dependency between phase and the rest of + * the cqe requires a full read memory barrier + */ + dma_rmb(); + nvme_handle_cqe(nvmeq, iob, nvmeq->cq_head); + nvme_update_cq_head(nvmeq); + } + + if (found) + nvme_ring_cq_doorbell(nvmeq); + return found; +} + + static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_batch *iob) { struct nvme_queue *nvmeq = hctx->driver_data; @@ -1101,7 +1142,7 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_batch *iob) return 0; spin_lock(&nvmeq->cq_poll_lock); - found = nvme_process_cq(nvmeq); + found = nvme_poll_cq(nvmeq, iob); spin_unlock(&nvmeq->cq_poll_lock); return found; @@ -1639,6 +1680,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { static const struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, + .complete_batch = nvme_pci_complete_batch, .commit_rqs = nvme_commit_rqs, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request,
Take advantage of struct io_batch, if passed in to the nvme poll handler. If it's set, rather than complete each request individually inline, store them in the io_batch list. We only do so for requests that will complete successfully, anything else will be completed inline as before. Add an mq_ops->complete_batch() handler to do the post-processing of the io_batch list once polling is complete. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- drivers/nvme/host/core.c | 18 +++++++++++--- drivers/nvme/host/nvme.h | 17 +++++++++++++ drivers/nvme/host/pci.c | 54 +++++++++++++++++++++++++++++++++++----- 3 files changed, 80 insertions(+), 9 deletions(-)