Message ID | 20210201100509.27351-9-borisp@mellanox.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | nvme-tcp receive offloads | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | fail | Series longer than 15 patches |
netdev/tree_selection | success | Clearly marked for net-next |
netdev/subject_prefix | success | Link |
netdev/cc_maintainers | success | CCed 5 of 5 maintainers |
netdev/source_inline | fail | Was 0 now: 2 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | fail | Errors and warnings before: 1 this patch: 2 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | fail | ERROR: trailing whitespace WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns |
netdev/build_allmodconfig_warn | fail | Errors and warnings before: 1 this patch: 2 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
On 2/1/21 2:04 AM, Boris Pismenny wrote: > From: Ben Ben-ishay <benishay@nvidia.com> > > crc offload of the nvme capsule. Check if all the skb bits > are on, and if not recalculate the crc in SW and check it. > > This patch reworks the receive-side crc calculation to always > run at the end, so as to keep a single flow for both offload > and non-offload. This change simplifies the code, but it may degrade > performance for non-offload crc calculation. > > Signed-off-by: Boris Pismenny <borisp@mellanox.com> > Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com> > Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> > Signed-off-by: Yoray Zack <yorayz@mellanox.com> > --- > drivers/nvme/host/tcp.c | 118 ++++++++++++++++++++++++++++++++-------- > 1 file changed, 95 insertions(+), 23 deletions(-) > > diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c > index 5cb46deb56e0..eb47cf6982d7 100644 > --- a/drivers/nvme/host/tcp.c > +++ b/drivers/nvme/host/tcp.c > @@ -69,6 +69,7 @@ enum nvme_tcp_queue_flags { > NVME_TCP_Q_LIVE = 1, > NVME_TCP_Q_POLLING = 2, > NVME_TCP_Q_OFF_DDP = 3, > + NVME_TCP_Q_OFF_DDGST_RX = 4, > }; > > enum nvme_tcp_recv_state { > @@ -96,6 +97,7 @@ struct nvme_tcp_queue { > size_t data_remaining; > size_t ddgst_remaining; > unsigned int nr_cqe; > + bool ddgst_valid; > > /* send state */ > struct nvme_tcp_request *request; > @@ -234,7 +236,56 @@ static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req, > return nvme_tcp_pdu_data_left(req) <= len; > } > > -#ifdef CONFIG_TCP_DDP > +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) > +{ > + return queue->ddgst_valid; > +} > + > +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, > + struct sk_buff *skb) > +{ > + if (queue->ddgst_valid) > +#ifdef CONFIG_TCP_DDP_CRC > + queue->ddgst_valid = skb->ddp_crc; > +#else > + queue->ddgst_valid = false; > +#endif > +} > + > +static int nvme_tcp_req_map_sg(struct nvme_tcp_request *req, struct request *rq) > +{ > + int ret; > + > + req->ddp.sg_table.sgl = req->ddp.first_sgl; > + ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq), > + req->ddp.sg_table.sgl, SG_CHUNK_SIZE); > + if (ret) > + return -ENOMEM; > + req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl); > + return 0; > +} > + > +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, > + struct request *rq) > +{ > + struct nvme_tcp_request *req; > + > + if (!rq) > + return; > + > + req = blk_mq_rq_to_pdu(rq); > + > + if (!req->offloaded && nvme_tcp_req_map_sg(req, rq)) > + return; > + > + crypto_ahash_init(hash); > + req->ddp.sg_table.sgl = req->ddp.first_sgl; > + ahash_request_set_crypt(hash, req->ddp.sg_table.sgl, NULL, > + le32_to_cpu(req->data_len)); > + crypto_ahash_update(hash); > +} > + > +#if defined(CONFIG_TCP_DDP) || defined(CONFIG_TCP_DDP_CRC) > > static bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags); > static void nvme_tcp_ddp_teardown_done(void *ddp_ctx); > @@ -290,12 +341,9 @@ int nvme_tcp_setup_ddp(struct nvme_tcp_queue *queue, > } > > req->ddp.command_id = command_id; > - req->ddp.sg_table.sgl = req->ddp.first_sgl; > - ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq), > - req->ddp.sg_table.sgl, SG_CHUNK_SIZE); > + ret = nvme_tcp_req_map_sg(req, rq); Why didn't you introduce nvme_tcp_req_map_sg in the first place? > if (ret) > return -ENOMEM; > - req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl); > > ret = netdev->tcp_ddp_ops->tcp_ddp_setup(netdev, > queue->sock->sk, > @@ -317,7 +365,7 @@ int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) > return -ENODEV; > } > > - if (!(netdev->features & NETIF_F_HW_TCP_DDP)) { > + if (!(netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX))) { > dev_put(netdev); > return -EOPNOTSUPP; > } > @@ -345,6 +393,9 @@ int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) > if (netdev->features & NETIF_F_HW_TCP_DDP) > set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); > > + if (netdev->features & NETIF_F_HW_TCP_DDP_CRC_RX) > + set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); > + > return ret; > } > > @@ -376,7 +427,7 @@ int nvme_tcp_offload_limits(struct nvme_tcp_queue *queue) > return -ENODEV; > } > > - if (netdev->features & NETIF_F_HW_TCP_DDP && > + if ((netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX)) && > netdev->tcp_ddp_ops && > netdev->tcp_ddp_ops->tcp_ddp_limits) > ret = netdev->tcp_ddp_ops->tcp_ddp_limits(netdev, &limits); > @@ -739,6 +790,7 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) > queue->pdu_offset = 0; > queue->data_remaining = -1; > queue->ddgst_remaining = 0; > + queue->ddgst_valid = true; > } > > static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) > @@ -919,7 +971,8 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, > > u64 pdu_seq = TCP_SKB_CB(skb)->seq + *offset - queue->pdu_offset; > > - if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) > + if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) || > + test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > nvme_tcp_resync_response(queue, pdu_seq); > > ret = skb_copy_bits(skb, *offset, > @@ -988,6 +1041,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, > struct nvme_tcp_request *req; > struct request *rq; > > + if (queue->data_digest && test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > + nvme_tcp_ddp_ddgst_update(queue, skb); > rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); > if (!rq) { > dev_err(queue->ctrl->ctrl.device, > @@ -1025,15 +1080,17 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, > recv_len = min_t(size_t, recv_len, > iov_iter_count(&req->iter)); > > - if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) { > - if (queue->data_digest) > + if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) { > + if (queue->data_digest && > + !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > ret = skb_ddp_copy_and_hash_datagram_iter(skb, *offset, > &req->iter, recv_len, queue->rcv_hash); > else > ret = skb_ddp_copy_datagram_iter(skb, *offset, > &req->iter, recv_len); > } else { > - if (queue->data_digest) > + if (queue->data_digest && > + !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > ret = skb_copy_and_hash_datagram_iter(skb, *offset, > &req->iter, recv_len, queue->rcv_hash); > else > @@ -1055,7 +1112,6 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, > > if (!queue->data_remaining) { > if (queue->data_digest) { > - nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); > queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; > } else { > if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { > @@ -1076,8 +1132,12 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, > char *ddgst = (char *)&queue->recv_ddgst; > size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); > off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; > + bool offload_fail, offload_en; > + struct request *rq = NULL; > int ret; > > + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > + nvme_tcp_ddp_ddgst_update(queue, skb); > ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); > if (unlikely(ret)) > return ret; > @@ -1088,17 +1148,29 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, > if (queue->ddgst_remaining) > return 0; > > - if (queue->recv_ddgst != queue->exp_ddgst) { > - dev_err(queue->ctrl->ctrl.device, > - "data digest error: recv %#x expected %#x\n", > - le32_to_cpu(queue->recv_ddgst), > - le32_to_cpu(queue->exp_ddgst)); > - return -EIO; > + offload_fail = !nvme_tcp_ddp_ddgst_ok(queue); > + offload_en = test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); > + if (!offload_en || offload_fail) { > + if (offload_en && offload_fail) { // software-fallback > + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), > + pdu->command_id); > + nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq); > + } > + > + nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); > + if (queue->recv_ddgst != queue->exp_ddgst) { > + dev_err(queue->ctrl->ctrl.device, > + "data digest error: recv %#x expected %#x\n", > + le32_to_cpu(queue->recv_ddgst), > + le32_to_cpu(queue->exp_ddgst)); > + return -EIO; > + } I still dislike this hunk. Can you split the recalc login to its own ddp function at least? This is just a confusing piece of code. > } > > if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { > - struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), > - pdu->command_id); > + if (!rq) > + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), > + pdu->command_id); Why is this change needed? Maybe just move this assignment up? > > nvme_tcp_end_request(rq, NVME_SC_SUCCESS); > queue->nr_cqe++; > @@ -1841,8 +1913,10 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) > nvme_tcp_restore_sock_calls(queue); > cancel_work_sync(&queue->io_work); > > - if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) > + if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) || > + test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > nvme_tcp_unoffload_socket(queue); > + extra newline > } > > static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) > @@ -1970,8 +2044,6 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) > { > int ret; > > - to_tcp_ctrl(ctrl)->offloading_netdev = NULL; > - Unclear what is the intent here. > ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); > if (ret) > return ret; >
On Wed, Feb 3, 2021 at 11:12 AM Sagi Grimberg <sagi@grimberg.me> wrote: > > @@ -1841,8 +1913,10 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) > > nvme_tcp_restore_sock_calls(queue); > > cancel_work_sync(&queue->io_work); > > > > - if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) > > + if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) || > > + test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) > > nvme_tcp_unoffload_socket(queue); > > + > > extra newline will remove > > } > > > > static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) > > @@ -1970,8 +2044,6 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) > > { > > int ret; > > > > - to_tcp_ctrl(ctrl)->offloading_netdev = NULL; > > - > > Unclear what is the intent here. yep, unclear indeed.. will look and probably remove as for your other comment on this patch, will get back to you later on > > ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); > > if (ret) > > return ret;
On Wed, Feb 3, 2021 at 11:12 AM Sagi Grimberg <sagi@grimberg.me> wrote: > On 2/1/21 2:04 AM, Boris Pismenny wrote: > > @@ -290,12 +341,9 @@ int nvme_tcp_setup_ddp(struct nvme_tcp_queue *queue, > > } > > > > req->ddp.command_id = command_id; > > - req->ddp.sg_table.sgl = req->ddp.first_sgl; > > - ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq), > > - req->ddp.sg_table.sgl, SG_CHUNK_SIZE); > > + ret = nvme_tcp_req_map_sg(req, rq); > > Why didn't you introduce nvme_tcp_req_map_sg in the first place? OK, will do > > if (ret) > > return -ENOMEM; > > - req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl); > > > > ret = netdev->tcp_ddp_ops->tcp_ddp_setup(netdev, > > queue->sock->sk, > > @@ -1088,17 +1148,29 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, > > if (queue->ddgst_remaining) > > return 0; > > > > - if (queue->recv_ddgst != queue->exp_ddgst) { > > - dev_err(queue->ctrl->ctrl.device, > > - "data digest error: recv %#x expected %#x\n", > > - le32_to_cpu(queue->recv_ddgst), > > - le32_to_cpu(queue->exp_ddgst)); > > - return -EIO; > > + offload_fail = !nvme_tcp_ddp_ddgst_ok(queue); > > + offload_en = test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); > > + if (!offload_en || offload_fail) { > > + if (offload_en && offload_fail) { // software-fallback > > + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), > > + pdu->command_id); > > + nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq); > > + } > > + > > + nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); > > + if (queue->recv_ddgst != queue->exp_ddgst) { > > + dev_err(queue->ctrl->ctrl.device, > > + "data digest error: recv %#x expected %#x\n", > > + le32_to_cpu(queue->recv_ddgst), > > + le32_to_cpu(queue->exp_ddgst)); > > + return -EIO; > > + } > > I still dislike this hunk. Can you split the recalc login to its > own ddp function at least? This is just a confusing piece of code. mmm, we added two boolean predicates (offload_en and offload_failed) plus a comment (software-fallback) to clarify this piece... thought it can fly > > if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { > > - struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), > > - pdu->command_id); > > + if (!rq) > > + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), > > + pdu->command_id); > > Why is this change needed? Maybe just move this assignment up? OK will move it up
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 5cb46deb56e0..eb47cf6982d7 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -69,6 +69,7 @@ enum nvme_tcp_queue_flags { NVME_TCP_Q_LIVE = 1, NVME_TCP_Q_POLLING = 2, NVME_TCP_Q_OFF_DDP = 3, + NVME_TCP_Q_OFF_DDGST_RX = 4, }; enum nvme_tcp_recv_state { @@ -96,6 +97,7 @@ struct nvme_tcp_queue { size_t data_remaining; size_t ddgst_remaining; unsigned int nr_cqe; + bool ddgst_valid; /* send state */ struct nvme_tcp_request *request; @@ -234,7 +236,56 @@ static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req, return nvme_tcp_pdu_data_left(req) <= len; } -#ifdef CONFIG_TCP_DDP +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue) +{ + return queue->ddgst_valid; +} + +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue, + struct sk_buff *skb) +{ + if (queue->ddgst_valid) +#ifdef CONFIG_TCP_DDP_CRC + queue->ddgst_valid = skb->ddp_crc; +#else + queue->ddgst_valid = false; +#endif +} + +static int nvme_tcp_req_map_sg(struct nvme_tcp_request *req, struct request *rq) +{ + int ret; + + req->ddp.sg_table.sgl = req->ddp.first_sgl; + ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq), + req->ddp.sg_table.sgl, SG_CHUNK_SIZE); + if (ret) + return -ENOMEM; + req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl); + return 0; +} + +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash, + struct request *rq) +{ + struct nvme_tcp_request *req; + + if (!rq) + return; + + req = blk_mq_rq_to_pdu(rq); + + if (!req->offloaded && nvme_tcp_req_map_sg(req, rq)) + return; + + crypto_ahash_init(hash); + req->ddp.sg_table.sgl = req->ddp.first_sgl; + ahash_request_set_crypt(hash, req->ddp.sg_table.sgl, NULL, + le32_to_cpu(req->data_len)); + crypto_ahash_update(hash); +} + +#if defined(CONFIG_TCP_DDP) || defined(CONFIG_TCP_DDP_CRC) static bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags); static void nvme_tcp_ddp_teardown_done(void *ddp_ctx); @@ -290,12 +341,9 @@ int nvme_tcp_setup_ddp(struct nvme_tcp_queue *queue, } req->ddp.command_id = command_id; - req->ddp.sg_table.sgl = req->ddp.first_sgl; - ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq), - req->ddp.sg_table.sgl, SG_CHUNK_SIZE); + ret = nvme_tcp_req_map_sg(req, rq); if (ret) return -ENOMEM; - req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl); ret = netdev->tcp_ddp_ops->tcp_ddp_setup(netdev, queue->sock->sk, @@ -317,7 +365,7 @@ int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) return -ENODEV; } - if (!(netdev->features & NETIF_F_HW_TCP_DDP)) { + if (!(netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX))) { dev_put(netdev); return -EOPNOTSUPP; } @@ -345,6 +393,9 @@ int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue) if (netdev->features & NETIF_F_HW_TCP_DDP) set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags); + if (netdev->features & NETIF_F_HW_TCP_DDP_CRC_RX) + set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); + return ret; } @@ -376,7 +427,7 @@ int nvme_tcp_offload_limits(struct nvme_tcp_queue *queue) return -ENODEV; } - if (netdev->features & NETIF_F_HW_TCP_DDP && + if ((netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX)) && netdev->tcp_ddp_ops && netdev->tcp_ddp_ops->tcp_ddp_limits) ret = netdev->tcp_ddp_ops->tcp_ddp_limits(netdev, &limits); @@ -739,6 +790,7 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) queue->pdu_offset = 0; queue->data_remaining = -1; queue->ddgst_remaining = 0; + queue->ddgst_valid = true; } static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) @@ -919,7 +971,8 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, u64 pdu_seq = TCP_SKB_CB(skb)->seq + *offset - queue->pdu_offset; - if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) + if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) || + test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) nvme_tcp_resync_response(queue, pdu_seq); ret = skb_copy_bits(skb, *offset, @@ -988,6 +1041,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, struct nvme_tcp_request *req; struct request *rq; + if (queue->data_digest && test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) + nvme_tcp_ddp_ddgst_update(queue, skb); rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, @@ -1025,15 +1080,17 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, recv_len = min_t(size_t, recv_len, iov_iter_count(&req->iter)); - if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) { - if (queue->data_digest) + if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) { + if (queue->data_digest && + !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) ret = skb_ddp_copy_and_hash_datagram_iter(skb, *offset, &req->iter, recv_len, queue->rcv_hash); else ret = skb_ddp_copy_datagram_iter(skb, *offset, &req->iter, recv_len); } else { - if (queue->data_digest) + if (queue->data_digest && + !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) ret = skb_copy_and_hash_datagram_iter(skb, *offset, &req->iter, recv_len, queue->rcv_hash); else @@ -1055,7 +1112,6 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, if (!queue->data_remaining) { if (queue->data_digest) { - nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; } else { if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { @@ -1076,8 +1132,12 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, char *ddgst = (char *)&queue->recv_ddgst; size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; + bool offload_fail, offload_en; + struct request *rq = NULL; int ret; + if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) + nvme_tcp_ddp_ddgst_update(queue, skb); ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); if (unlikely(ret)) return ret; @@ -1088,17 +1148,29 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, if (queue->ddgst_remaining) return 0; - if (queue->recv_ddgst != queue->exp_ddgst) { - dev_err(queue->ctrl->ctrl.device, - "data digest error: recv %#x expected %#x\n", - le32_to_cpu(queue->recv_ddgst), - le32_to_cpu(queue->exp_ddgst)); - return -EIO; + offload_fail = !nvme_tcp_ddp_ddgst_ok(queue); + offload_en = test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags); + if (!offload_en || offload_fail) { + if (offload_en && offload_fail) { // software-fallback + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), + pdu->command_id); + nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq); + } + + nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); + if (queue->recv_ddgst != queue->exp_ddgst) { + dev_err(queue->ctrl->ctrl.device, + "data digest error: recv %#x expected %#x\n", + le32_to_cpu(queue->recv_ddgst), + le32_to_cpu(queue->exp_ddgst)); + return -EIO; + } } if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { - struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), - pdu->command_id); + if (!rq) + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), + pdu->command_id); nvme_tcp_end_request(rq, NVME_SC_SUCCESS); queue->nr_cqe++; @@ -1841,8 +1913,10 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) nvme_tcp_restore_sock_calls(queue); cancel_work_sync(&queue->io_work); - if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) + if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) || + test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags)) nvme_tcp_unoffload_socket(queue); + } static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) @@ -1970,8 +2044,6 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) { int ret; - to_tcp_ctrl(ctrl)->offloading_netdev = NULL; - ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); if (ret) return ret;