Message ID | 20231214132623.119227-8-aaptel@nvidia.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | nvme-tcp receive offloads | expand |
On 14/12/2023 15:26, Aurelien Aptel wrote: > From: Yoray Zack <yorayz@nvidia.com> > > Enable rx side of DDGST offload when supported. > > At the end of the capsule, check if all the skb bits are on, and if not > recalculate the DDGST in SW and check it. > > Signed-off-by: Yoray Zack <yorayz@nvidia.com> > Signed-off-by: Boris Pismenny <borisp@nvidia.com> > Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com> > Signed-off-by: Or Gerlitz <ogerlitz@nvidia.com> > Signed-off-by: Shai Malin <smalin@nvidia.com> > Signed-off-by: Aurelien Aptel <aaptel@nvidia.com> > Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> > --- > drivers/nvme/host/tcp.c | 81 ++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 76 insertions(+), 5 deletions(-) > > diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c > index 09ffa8ba7e72..a7591eb90b96 100644 > --- a/drivers/nvme/host/tcp.c > +++ b/drivers/nvme/host/tcp.c > @@ -143,6 +143,7 @@ enum nvme_tcp_queue_flags { > NVME_TCP_Q_LIVE = 1, > NVME_TCP_Q_POLLING = 2, > NVME_TCP_Q_OFF_DDP = 3, > + NVME_TCP_Q_OFF_DDGST_RX = 4, > }; > > enum nvme_tcp_recv_state { > @@ -180,6 +181,7 @@ struct nvme_tcp_queue { > * is pending (ULP_DDP_RESYNC_PENDING). > */ > atomic64_t resync_tcp_seq; > + bool ddp_ddgst_valid; > #endif > > /* send state */ > @@ -378,6 +380,30 @@ nvme_tcp_get_ddp_netdev_with_limits(struct nvme_tcp_ctrl *ctrl) > return NULL; > } > > +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) > +{ > + return queue->ddp_ddgst_valid; > +} > + > +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, > + struct sk_buff *skb) > +{ > + if (queue->ddp_ddgst_valid) > + queue->ddp_ddgst_valid = skb_is_ulp_crc(skb); > +} > + > +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, > + struct request *rq, > + __le32 *ddgst) > +{ > + struct nvme_tcp_request *req; > + > + req = blk_mq_rq_to_pdu(rq); > + ahash_request_set_crypt(hash, req->ddp.sg_table.sgl, (u8 *)ddgst, > + req->data_len); > + crypto_ahash_digest(hash); > +} > + > static bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags); > static void nvme_tcp_ddp_teardown_done(void *ddp_ctx); > static const struct ulp_ddp_ulp_ops nvme_tcp_ddp_ulp_ops = { > @@ -467,6 +493,10 @@ static int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) > return ret; > > set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); > + if (queue->data_digest && > + ulp_ddp_is_cap_active(queue->ctrl->ddp_netdev, > + ULP_DDP_CAP_NVME_TCP_DDGST_RX)) > + set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); > > return 0; > } > @@ -474,6 +504,7 @@ static int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) > static void nvme_tcp_unoffload_socket(struct nvme_tcp_queue *queue) > { > clear_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); > + clear_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); > ulp_ddp_sk_del(queue->ctrl->ddp_netdev, queue->sock->sk); > } > > @@ -582,6 +613,20 @@ static void nvme_tcp_resync_response(struct nvme_tcp_queue *queue, > struct sk_buff *skb, unsigned int offset) > {} > > +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) > +{ > + return false; > +} > + > +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, > + struct sk_buff *skb) > +{} > + > +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, > + struct request *rq, > + __le32 *ddgst) > +{} > + > #endif > > static void nvme_tcp_init_iter(struct nvme_tcp_request *req, > @@ -842,6 +887,9 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) > queue->pdu_offset = 0; > queue->data_remaining = -1; > queue->ddgst_remaining = 0; > +#ifdef CONFIG_ULP_DDP > + queue->ddp_ddgst_valid = true; > +#endif > } > > static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) > @@ -1107,6 +1155,10 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, > nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); > struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); > > + if (queue->data_digest && > + test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > + nvme_tcp_ddp_ddgst_update(queue, skb); I think we can only use the test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags) in the if condition. We dont set this bit unless queue->data_digest is true, correct ? It is a micro optimization, but still can help. Otherwise looks good, Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com> > + > while (true) { > int recv_len, ret; > > @@ -1135,7 +1187,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, > recv_len = min_t(size_t, recv_len, > iov_iter_count(&req->iter)); > > - if (queue->data_digest) > + if (queue->data_digest && > + !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > ret = skb_copy_and_hash_datagram_iter(skb, *offset, > &req->iter, recv_len, queue->rcv_hash); > else > @@ -1177,8 +1230,11 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, > char *ddgst = (char *)&queue->recv_ddgst; > size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); > off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; > + struct request *rq; > int ret; > > + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > + nvme_tcp_ddp_ddgst_update(queue, skb); > ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); > if (unlikely(ret)) > return ret; > @@ -1189,9 +1245,25 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, > if (queue->ddgst_remaining) > return 0; > > + rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), > + pdu->command_id); > + > + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) { > + /* > + * If HW successfully offloaded the digest > + * verification, we can skip it > + */ > + if (nvme_tcp_ddp_ddgst_ok(queue)) > + goto out; > + /* > + * Otherwise we have to recalculate and verify the > + * digest with the software-fallback > + */ > + nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq, > + &queue->exp_ddgst); > + } > + > if (queue->recv_ddgst != queue->exp_ddgst) { > - struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), > - pdu->command_id); > struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); > > req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR); > @@ -1202,9 +1274,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, > le32_to_cpu(queue->exp_ddgst)); > } > > +out: > if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { > - struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), > - pdu->command_id); > struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); > > nvme_tcp_end_request(rq, le16_to_cpu(req->status));
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 09ffa8ba7e72..a7591eb90b96 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -143,6 +143,7 @@ enum nvme_tcp_queue_flags { NVME_TCP_Q_LIVE = 1, NVME_TCP_Q_POLLING = 2, NVME_TCP_Q_OFF_DDP = 3, + NVME_TCP_Q_OFF_DDGST_RX = 4, }; enum nvme_tcp_recv_state { @@ -180,6 +181,7 @@ struct nvme_tcp_queue { * is pending (ULP_DDP_RESYNC_PENDING). */ atomic64_t resync_tcp_seq; + bool ddp_ddgst_valid; #endif /* send state */ @@ -378,6 +380,30 @@ nvme_tcp_get_ddp_netdev_with_limits(struct nvme_tcp_ctrl *ctrl) return NULL; } +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) +{ + return queue->ddp_ddgst_valid; +} + +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, + struct sk_buff *skb) +{ + if (queue->ddp_ddgst_valid) + queue->ddp_ddgst_valid = skb_is_ulp_crc(skb); +} + +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, + struct request *rq, + __le32 *ddgst) +{ + struct nvme_tcp_request *req; + + req = blk_mq_rq_to_pdu(rq); + ahash_request_set_crypt(hash, req->ddp.sg_table.sgl, (u8 *)ddgst, + req->data_len); + crypto_ahash_digest(hash); +} + static bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags); static void nvme_tcp_ddp_teardown_done(void *ddp_ctx); static const struct ulp_ddp_ulp_ops nvme_tcp_ddp_ulp_ops = { @@ -467,6 +493,10 @@ static int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) return ret; set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); + if (queue->data_digest && + ulp_ddp_is_cap_active(queue->ctrl->ddp_netdev, + ULP_DDP_CAP_NVME_TCP_DDGST_RX)) + set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); return 0; } @@ -474,6 +504,7 @@ static int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) static void nvme_tcp_unoffload_socket(struct nvme_tcp_queue *queue) { clear_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); + clear_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); ulp_ddp_sk_del(queue->ctrl->ddp_netdev, queue->sock->sk); } @@ -582,6 +613,20 @@ static void nvme_tcp_resync_response(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int offset) {} +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) +{ + return false; +} + +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, + struct sk_buff *skb) +{} + +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, + struct request *rq, + __le32 *ddgst) +{} + #endif static void nvme_tcp_init_iter(struct nvme_tcp_request *req, @@ -842,6 +887,9 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) queue->pdu_offset = 0; queue->data_remaining = -1; queue->ddgst_remaining = 0; +#ifdef CONFIG_ULP_DDP + queue->ddp_ddgst_valid = true; +#endif } static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) @@ -1107,6 +1155,10 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + if (queue->data_digest && + test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) + nvme_tcp_ddp_ddgst_update(queue, skb); + while (true) { int recv_len, ret; @@ -1135,7 +1187,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, recv_len = min_t(size_t, recv_len, iov_iter_count(&req->iter)); - if (queue->data_digest) + if (queue->data_digest && + !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) ret = skb_copy_and_hash_datagram_iter(skb, *offset, &req->iter, recv_len, queue->rcv_hash); else @@ -1177,8 +1230,11 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, char *ddgst = (char *)&queue->recv_ddgst; size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; + struct request *rq; int ret; + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) + nvme_tcp_ddp_ddgst_update(queue, skb); ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); if (unlikely(ret)) return ret; @@ -1189,9 +1245,25 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, if (queue->ddgst_remaining) return 0; + rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), + pdu->command_id); + + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) { + /* + * If HW successfully offloaded the digest + * verification, we can skip it + */ + if (nvme_tcp_ddp_ddgst_ok(queue)) + goto out; + /* + * Otherwise we have to recalculate and verify the + * digest with the software-fallback + */ + nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq, + &queue->exp_ddgst); + } + if (queue->recv_ddgst != queue->exp_ddgst) { - struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), - pdu->command_id); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR); @@ -1202,9 +1274,8 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, le32_to_cpu(queue->exp_ddgst)); } +out: if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { - struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), - pdu->command_id); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); nvme_tcp_end_request(rq, le16_to_cpu(req->status));