Message ID | 20240228125733.299384-8-aaptel@nvidia.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | nvme-tcp receive offloads | expand |
On 28/02/2024 14:57, Aurelien Aptel wrote: > From: Yoray Zack <yorayz@nvidia.com> > > Enable rx side of DDGST offload when supported. > > At the end of the capsule, check if all the skb bits are on, and if not > recalculate the DDGST in SW and check it. > > Signed-off-by: Yoray Zack <yorayz@nvidia.com> > Signed-off-by: Boris Pismenny <borisp@nvidia.com> > Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com> > Signed-off-by: Or Gerlitz <ogerlitz@nvidia.com> > Signed-off-by: Shai Malin <smalin@nvidia.com> > Signed-off-by: Aurelien Aptel <aaptel@nvidia.com> > Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> > Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com> > --- > drivers/nvme/host/tcp.c | 96 ++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 91 insertions(+), 5 deletions(-) > > diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c > index b7cfe14661d6..2eebd9d2aee5 100644 > --- a/drivers/nvme/host/tcp.c > +++ b/drivers/nvme/host/tcp.c > @@ -143,6 +143,7 @@ enum nvme_tcp_queue_flags { > NVME_TCP_Q_LIVE = 1, > NVME_TCP_Q_POLLING = 2, > NVME_TCP_Q_OFF_DDP = 3, > + NVME_TCP_Q_OFF_DDGST_RX = 4, > }; > > enum nvme_tcp_recv_state { > @@ -180,6 +181,7 @@ struct nvme_tcp_queue { > * is pending (ULP_DDP_RESYNC_PENDING). > */ > atomic64_t resync_tcp_seq; > + bool ddp_ddgst_valid; > #endif > > /* send state */ > @@ -393,6 +395,46 @@ nvme_tcp_get_ddp_netdev_with_limits(struct nvme_tcp_ctrl *ctrl) > return NULL; > } > > +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) > +{ > + return queue->ddp_ddgst_valid; > +} > + > +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, > + struct sk_buff *skb) > +{ > + if (queue->ddp_ddgst_valid) > + queue->ddp_ddgst_valid = skb_is_ulp_crc(skb); > +} > + > +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, > + struct request *rq, > + __le32 *ddgst) > +{ > + struct nvme_tcp_request *req; > + > + req = blk_mq_rq_to_pdu(rq); > + if (!req->offloaded) { > + /* if we have DDGST_RX offload but DDP was skipped > + * because it's under the min IO threshold then the > + * request won't have an SGL, so we need to make it > + * here > + */ > + if (nvme_tcp_ddp_alloc_sgl(req, rq)) > + return; > + } > + > + ahash_request_set_crypt(hash, req->ddp.sg_table.sgl, (u8 *)ddgst, > + req->data_len); > + crypto_ahash_digest(hash); > + if (!req->offloaded) { > + /* without DDP, ddp_teardown() won't be called, so > + * free the table here > + */ > + sg_free_table_chained(&req->ddp.sg_table, SG_CHUNK_SIZE); > + } > +} > + > static bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags); > static void nvme_tcp_ddp_teardown_done(void *ddp_ctx); > static const struct ulp_ddp_ulp_ops nvme_tcp_ddp_ulp_ops = { > @@ -478,6 +520,10 @@ static int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) > return ret; > > set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); > + if (queue->data_digest && > + ulp_ddp_is_cap_active(queue->ctrl->ddp_netdev, > + ULP_DDP_CAP_NVME_TCP_DDGST_RX)) > + set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); > > return 0; > } > @@ -485,6 +531,7 @@ static int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) > static void nvme_tcp_unoffload_socket(struct nvme_tcp_queue *queue) > { > clear_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); > + clear_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); > ulp_ddp_sk_del(queue->ctrl->ddp_netdev, queue->sock->sk); > } > > @@ -593,6 +640,20 @@ static void nvme_tcp_resync_response(struct nvme_tcp_queue *queue, > struct sk_buff *skb, unsigned int offset) > {} > > +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) > +{ > + return false; > +} > + > +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, > + struct sk_buff *skb) > +{} > + > +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, > + struct request *rq, > + __le32 *ddgst) > +{} > + > #endif > > static void nvme_tcp_init_iter(struct nvme_tcp_request *req, > @@ -853,6 +914,9 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) > queue->pdu_offset = 0; > queue->data_remaining = -1; > queue->ddgst_remaining = 0; > +#ifdef CONFIG_ULP_DDP > + queue->ddp_ddgst_valid = true; > +#endif > } > > static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) > @@ -1118,6 +1182,9 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, > nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); > struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); > > + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > + nvme_tcp_ddp_ddgst_update(queue, skb); > + > while (true) { > int recv_len, ret; > > @@ -1146,7 +1213,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, > recv_len = min_t(size_t, recv_len, > iov_iter_count(&req->iter)); > > - if (queue->data_digest) > + if (queue->data_digest && > + !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > ret = skb_copy_and_hash_datagram_iter(skb, *offset, > &req->iter, recv_len, queue->rcv_hash); > else > @@ -1188,8 +1256,11 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, > char *ddgst = (char *)&queue->recv_ddgst; > size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); > off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; > + struct request *rq; > int ret; > > + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > + nvme_tcp_ddp_ddgst_update(queue, skb); > ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); > if (unlikely(ret)) > return ret; > @@ -1200,9 +1271,25 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, > if (queue->ddgst_remaining) > return 0; > > + rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), > + pdu->command_id); > + > + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) { > + /* > + * If HW successfully offloaded the digest > + * verification, we can skip it > + */ > + if (nvme_tcp_ddp_ddgst_ok(queue)) > + goto out; > + /* > + * Otherwise we have to recalculate and verify the > + * digest with the software-fallback > + */ > + nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq, > + &queue->exp_ddgst); > + } > + > if (queue->recv_ddgst != queue->exp_ddgst) { > - struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), > - pdu->command_id); > struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); > > req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR); > @@ -1213,9 +1300,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, > le32_to_cpu(queue->exp_ddgst)); > } > > +out: Nit: you can rename the label to ddgst_valid: or ddgst_ok: Other than that, Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Sagi Grimberg <sagi@grimberg.me> writes: > Nit: you can rename the label to ddgst_valid: or ddgst_ok: Ok, we will do the rename. > Other than that, > Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Thanks!
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b7cfe14661d6..2eebd9d2aee5 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -143,6 +143,7 @@ enum nvme_tcp_queue_flags { NVME_TCP_Q_LIVE = 1, NVME_TCP_Q_POLLING = 2, NVME_TCP_Q_OFF_DDP = 3, + NVME_TCP_Q_OFF_DDGST_RX = 4, }; enum nvme_tcp_recv_state { @@ -180,6 +181,7 @@ struct nvme_tcp_queue { * is pending (ULP_DDP_RESYNC_PENDING). */ atomic64_t resync_tcp_seq; + bool ddp_ddgst_valid; #endif /* send state */ @@ -393,6 +395,46 @@ nvme_tcp_get_ddp_netdev_with_limits(struct nvme_tcp_ctrl *ctrl) return NULL; } +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) +{ + return queue->ddp_ddgst_valid; +} + +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, + struct sk_buff *skb) +{ + if (queue->ddp_ddgst_valid) + queue->ddp_ddgst_valid = skb_is_ulp_crc(skb); +} + +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, + struct request *rq, + __le32 *ddgst) +{ + struct nvme_tcp_request *req; + + req = blk_mq_rq_to_pdu(rq); + if (!req->offloaded) { + /* if we have DDGST_RX offload but DDP was skipped + * because it's under the min IO threshold then the + * request won't have an SGL, so we need to make it + * here + */ + if (nvme_tcp_ddp_alloc_sgl(req, rq)) + return; + } + + ahash_request_set_crypt(hash, req->ddp.sg_table.sgl, (u8 *)ddgst, + req->data_len); + crypto_ahash_digest(hash); + if (!req->offloaded) { + /* without DDP, ddp_teardown() won't be called, so + * free the table here + */ + sg_free_table_chained(&req->ddp.sg_table, SG_CHUNK_SIZE); + } +} + static bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags); static void nvme_tcp_ddp_teardown_done(void *ddp_ctx); static const struct ulp_ddp_ulp_ops nvme_tcp_ddp_ulp_ops = { @@ -478,6 +520,10 @@ static int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) return ret; set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); + if (queue->data_digest && + ulp_ddp_is_cap_active(queue->ctrl->ddp_netdev, + ULP_DDP_CAP_NVME_TCP_DDGST_RX)) + set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); return 0; } @@ -485,6 +531,7 @@ static int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) static void nvme_tcp_unoffload_socket(struct nvme_tcp_queue *queue) { clear_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); + clear_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); ulp_ddp_sk_del(queue->ctrl->ddp_netdev, queue->sock->sk); } @@ -593,6 +640,20 @@ static void nvme_tcp_resync_response(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int offset) {} +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) +{ + return false; +} + +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, + struct sk_buff *skb) +{} + +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, + struct request *rq, + __le32 *ddgst) +{} + #endif static void nvme_tcp_init_iter(struct nvme_tcp_request *req, @@ -853,6 +914,9 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) queue->pdu_offset = 0; queue->data_remaining = -1; queue->ddgst_remaining = 0; +#ifdef CONFIG_ULP_DDP + queue->ddp_ddgst_valid = true; +#endif } static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) @@ -1118,6 +1182,9 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) + nvme_tcp_ddp_ddgst_update(queue, skb); + while (true) { int recv_len, ret; @@ -1146,7 +1213,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, recv_len = min_t(size_t, recv_len, iov_iter_count(&req->iter)); - if (queue->data_digest) + if (queue->data_digest && + !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) ret = skb_copy_and_hash_datagram_iter(skb, *offset, &req->iter, recv_len, queue->rcv_hash); else @@ -1188,8 +1256,11 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, char *ddgst = (char *)&queue->recv_ddgst; size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; + struct request *rq; int ret; + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) + nvme_tcp_ddp_ddgst_update(queue, skb); ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); if (unlikely(ret)) return ret; @@ -1200,9 +1271,25 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, if (queue->ddgst_remaining) return 0; + rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), + pdu->command_id); + + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) { + /* + * If HW successfully offloaded the digest + * verification, we can skip it + */ + if (nvme_tcp_ddp_ddgst_ok(queue)) + goto out; + /* + * Otherwise we have to recalculate and verify the + * digest with the software-fallback + */ + nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq, + &queue->exp_ddgst); + } + if (queue->recv_ddgst != queue->exp_ddgst) { - struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), - pdu->command_id); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR); @@ -1213,9 +1300,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, le32_to_cpu(queue->exp_ddgst)); } +out: if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { - struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), - pdu->command_id); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); nvme_tcp_end_request(rq, le16_to_cpu(req->status));