diff mbox series

[v3,net-next,08/21] nvme-tcp : Recalculate crc in the end of the capsule

Message ID 20210201100509.27351-9-borisp@mellanox.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series nvme-tcp receive offloads | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/source_inline fail Was 0 now: 2
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 1 this patch: 2
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch fail ERROR: trailing whitespace WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns
netdev/build_allmodconfig_warn fail Errors and warnings before: 1 this patch: 2
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Boris Pismenny Feb. 1, 2021, 10:04 a.m. UTC
From: Ben Ben-ishay <benishay@nvidia.com>

crc offload of the nvme capsule. Check if all the skb bits
are on, and if not recalculate the crc in SW and check it.

This patch reworks the receive-side crc calculation to always
run at the end, so as to keep a single flow for both offload
and non-offload. This change simplifies the code, but it may degrade
performance for non-offload crc calculation.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Yoray Zack <yorayz@mellanox.com>
---
 drivers/nvme/host/tcp.c | 118 ++++++++++++++++++++++++++++++++--------
 1 file changed, 95 insertions(+), 23 deletions(-)

Comments

Sagi Grimberg Feb. 3, 2021, 9:06 a.m. UTC | #1
On 2/1/21 2:04 AM, Boris Pismenny wrote:
> From: Ben Ben-ishay <benishay@nvidia.com>
> 
> crc offload of the nvme capsule. Check if all the skb bits
> are on, and if not recalculate the crc in SW and check it.
> 
> This patch reworks the receive-side crc calculation to always
> run at the end, so as to keep a single flow for both offload
> and non-offload. This change simplifies the code, but it may degrade
> performance for non-offload crc calculation.
> 
> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com>
> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> Signed-off-by: Yoray Zack <yorayz@mellanox.com>
> ---
>   drivers/nvme/host/tcp.c | 118 ++++++++++++++++++++++++++++++++--------
>   1 file changed, 95 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 5cb46deb56e0..eb47cf6982d7 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -69,6 +69,7 @@ enum nvme_tcp_queue_flags {
>   	NVME_TCP_Q_LIVE		= 1,
>   	NVME_TCP_Q_POLLING	= 2,
>   	NVME_TCP_Q_OFF_DDP	= 3,
> +	NVME_TCP_Q_OFF_DDGST_RX = 4,
>   };
>   
>   enum nvme_tcp_recv_state {
> @@ -96,6 +97,7 @@ struct nvme_tcp_queue {
>   	size_t			data_remaining;
>   	size_t			ddgst_remaining;
>   	unsigned int		nr_cqe;
> +	bool			ddgst_valid;
>   
>   	/* send state */
>   	struct nvme_tcp_request *request;
> @@ -234,7 +236,56 @@ static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
>   	return nvme_tcp_pdu_data_left(req) <= len;
>   }
>   
> -#ifdef CONFIG_TCP_DDP
> +static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue)
> +{
> +	return queue->ddgst_valid;
> +}
> +
> +static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue,
> +					     struct sk_buff *skb)
> +{
> +	if (queue->ddgst_valid)
> +#ifdef CONFIG_TCP_DDP_CRC
> +		queue->ddgst_valid = skb->ddp_crc;
> +#else
> +		queue->ddgst_valid = false;
> +#endif
> +}
> +
> +static int nvme_tcp_req_map_sg(struct nvme_tcp_request *req, struct request *rq)
> +{
> +	int ret;
> +
> +	req->ddp.sg_table.sgl = req->ddp.first_sgl;
> +	ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq),
> +				     req->ddp.sg_table.sgl, SG_CHUNK_SIZE);
> +	if (ret)
> +		return -ENOMEM;
> +	req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl);
> +	return 0;
> +}
> +
> +static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash,
> +				      struct request *rq)
> +{
> +	struct nvme_tcp_request *req;
> +
> +	if (!rq)
> +		return;
> +
> +	req = blk_mq_rq_to_pdu(rq);
> +
> +	if (!req->offloaded && nvme_tcp_req_map_sg(req, rq))
> +		return;
> +
> +	crypto_ahash_init(hash);
> +	req->ddp.sg_table.sgl = req->ddp.first_sgl;
> +	ahash_request_set_crypt(hash, req->ddp.sg_table.sgl, NULL,
> +				le32_to_cpu(req->data_len));
> +	crypto_ahash_update(hash);
> +}
> +
> +#if defined(CONFIG_TCP_DDP) || defined(CONFIG_TCP_DDP_CRC)
>   
>   static bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags);
>   static void nvme_tcp_ddp_teardown_done(void *ddp_ctx);
> @@ -290,12 +341,9 @@ int nvme_tcp_setup_ddp(struct nvme_tcp_queue *queue,
>   	}
>   
>   	req->ddp.command_id = command_id;
> -	req->ddp.sg_table.sgl = req->ddp.first_sgl;
> -	ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq),
> -				     req->ddp.sg_table.sgl, SG_CHUNK_SIZE);
> +	ret = nvme_tcp_req_map_sg(req, rq);

Why didn't you introduce nvme_tcp_req_map_sg in the first place?

>   	if (ret)
>   		return -ENOMEM;
> -	req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl);
>   
>   	ret = netdev->tcp_ddp_ops->tcp_ddp_setup(netdev,
>   						 queue->sock->sk,
> @@ -317,7 +365,7 @@ int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue)
>   		return -ENODEV;
>   	}
>   
> -	if (!(netdev->features & NETIF_F_HW_TCP_DDP)) {
> +	if (!(netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX))) {
>   		dev_put(netdev);
>   		return -EOPNOTSUPP;
>   	}
> @@ -345,6 +393,9 @@ int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue)
>   	if (netdev->features & NETIF_F_HW_TCP_DDP)
>   		set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags);
>   
> +	if (netdev->features & NETIF_F_HW_TCP_DDP_CRC_RX)
> +		set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags);
> +
>   	return ret;
>   }
>   
> @@ -376,7 +427,7 @@ int nvme_tcp_offload_limits(struct nvme_tcp_queue *queue)
>   		return -ENODEV;
>   	}
>   
> -	if (netdev->features & NETIF_F_HW_TCP_DDP &&
> +	if ((netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX)) &&
>   	    netdev->tcp_ddp_ops &&
>   	    netdev->tcp_ddp_ops->tcp_ddp_limits)
>   		ret = netdev->tcp_ddp_ops->tcp_ddp_limits(netdev, &limits);
> @@ -739,6 +790,7 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
>   	queue->pdu_offset = 0;
>   	queue->data_remaining = -1;
>   	queue->ddgst_remaining = 0;
> +	queue->ddgst_valid = true;
>   }
>   
>   static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
> @@ -919,7 +971,8 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>   
>   	u64 pdu_seq = TCP_SKB_CB(skb)->seq + *offset - queue->pdu_offset;
>   
> -	if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags))
> +	if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) ||
> +	    test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
>   		nvme_tcp_resync_response(queue, pdu_seq);
>   
>   	ret = skb_copy_bits(skb, *offset,
> @@ -988,6 +1041,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>   	struct nvme_tcp_request *req;
>   	struct request *rq;
>   
> +	if (queue->data_digest && test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
> +		nvme_tcp_ddp_ddgst_update(queue, skb);
>   	rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
>   	if (!rq) {
>   		dev_err(queue->ctrl->ctrl.device,
> @@ -1025,15 +1080,17 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>   		recv_len = min_t(size_t, recv_len,
>   				iov_iter_count(&req->iter));
>   
> -		if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) {
> -			if (queue->data_digest)
> +		if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) {
> +			if (queue->data_digest &&
> +			    !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
>   				ret = skb_ddp_copy_and_hash_datagram_iter(skb, *offset,
>   						&req->iter, recv_len, queue->rcv_hash);
>   			else
>   				ret = skb_ddp_copy_datagram_iter(skb, *offset,
>   						&req->iter, recv_len);
>   		} else {
> -			if (queue->data_digest)
> +			if (queue->data_digest &&
> +			    !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
>   				ret = skb_copy_and_hash_datagram_iter(skb, *offset,
>   						&req->iter, recv_len, queue->rcv_hash);
>   			else
> @@ -1055,7 +1112,6 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>   
>   	if (!queue->data_remaining) {
>   		if (queue->data_digest) {
> -			nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
>   			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
>   		} else {
>   			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
> @@ -1076,8 +1132,12 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
>   	char *ddgst = (char *)&queue->recv_ddgst;
>   	size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
>   	off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
> +	bool offload_fail, offload_en;
> +	struct request *rq = NULL;
>   	int ret;
>   
> +	if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
> +		nvme_tcp_ddp_ddgst_update(queue, skb);
>   	ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
>   	if (unlikely(ret))
>   		return ret;
> @@ -1088,17 +1148,29 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
>   	if (queue->ddgst_remaining)
>   		return 0;
>   
> -	if (queue->recv_ddgst != queue->exp_ddgst) {
> -		dev_err(queue->ctrl->ctrl.device,
> -			"data digest error: recv %#x expected %#x\n",
> -			le32_to_cpu(queue->recv_ddgst),
> -			le32_to_cpu(queue->exp_ddgst));
> -		return -EIO;
> +	offload_fail = !nvme_tcp_ddp_ddgst_ok(queue);
> +	offload_en = test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags);
> +	if (!offload_en || offload_fail) {
> +		if (offload_en && offload_fail) { // software-fallback
> +			rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
> +					      pdu->command_id);
> +			nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq);
> +		}
> +
> +		nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
> +		if (queue->recv_ddgst != queue->exp_ddgst) {
> +			dev_err(queue->ctrl->ctrl.device,
> +				"data digest error: recv %#x expected %#x\n",
> +				le32_to_cpu(queue->recv_ddgst),
> +				le32_to_cpu(queue->exp_ddgst));
> +			return -EIO;
> +		}

I still dislike this hunk. Can you split the recalc login to its
own ddp function at least? This is just a confusing piece of code.

>   	}
>   
>   	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
> -		struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
> -						pdu->command_id);
> +		if (!rq)
> +			rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
> +					      pdu->command_id);

Why is this change needed? Maybe just move this assignment up?

>   
>   		nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
>   		queue->nr_cqe++;
> @@ -1841,8 +1913,10 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
>   	nvme_tcp_restore_sock_calls(queue);
>   	cancel_work_sync(&queue->io_work);
>   
> -	if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags))
> +	if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) ||
> +	    test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
>   		nvme_tcp_unoffload_socket(queue);
> +

extra newline

>   }
>   
>   static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
> @@ -1970,8 +2044,6 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
>   {
>   	int ret;
>   
> -	to_tcp_ctrl(ctrl)->offloading_netdev = NULL;
> -

Unclear what is the intent here.

>   	ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
>   	if (ret)
>   		return ret;
>
Or Gerlitz Feb. 4, 2021, 6:36 p.m. UTC | #2
On Wed, Feb 3, 2021 at 11:12 AM Sagi Grimberg <sagi@grimberg.me> wrote:

> > @@ -1841,8 +1913,10 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
> >       nvme_tcp_restore_sock_calls(queue);
> >       cancel_work_sync(&queue->io_work);
> >
> > -     if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags))
> > +     if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) ||
> > +         test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
> >               nvme_tcp_unoffload_socket(queue);
> > +
>
> extra newline

will remove

> >   }
> >
> >   static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
> > @@ -1970,8 +2044,6 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
> >   {
> >       int ret;
> >
> > -     to_tcp_ctrl(ctrl)->offloading_netdev = NULL;
> > -
>
> Unclear what is the intent here.

yep, unclear indeed.. will look and probably remove

as for your other comment on this patch, will get back to you later on

> >       ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
> >       if (ret)
> >               return ret;
Or Gerlitz Feb. 7, 2021, 4:40 p.m. UTC | #3
On Wed, Feb 3, 2021 at 11:12 AM Sagi Grimberg <sagi@grimberg.me> wrote:
> On 2/1/21 2:04 AM, Boris Pismenny wrote:

> > @@ -290,12 +341,9 @@ int nvme_tcp_setup_ddp(struct nvme_tcp_queue *queue,
> >       }
> >
> >       req->ddp.command_id = command_id;
> > -     req->ddp.sg_table.sgl = req->ddp.first_sgl;
> > -     ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq),
> > -                                  req->ddp.sg_table.sgl, SG_CHUNK_SIZE);
> > +     ret = nvme_tcp_req_map_sg(req, rq);
>
> Why didn't you introduce nvme_tcp_req_map_sg in the first place?

OK, will do

> >       if (ret)
> >               return -ENOMEM;
> > -     req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl);
> >
> >       ret = netdev->tcp_ddp_ops->tcp_ddp_setup(netdev,
> >                                                queue->sock->sk,

> > @@ -1088,17 +1148,29 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
> >       if (queue->ddgst_remaining)
> >               return 0;
> >
> > -     if (queue->recv_ddgst != queue->exp_ddgst) {
> > -             dev_err(queue->ctrl->ctrl.device,
> > -                     "data digest error: recv %#x expected %#x\n",
> > -                     le32_to_cpu(queue->recv_ddgst),
> > -                     le32_to_cpu(queue->exp_ddgst));
> > -             return -EIO;
> > +     offload_fail = !nvme_tcp_ddp_ddgst_ok(queue);
> > +     offload_en = test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags);
> > +     if (!offload_en || offload_fail) {
> > +             if (offload_en && offload_fail) { // software-fallback
> > +                     rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
> > +                                           pdu->command_id);
> > +                     nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq);
> > +             }
> > +
> > +             nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
> > +             if (queue->recv_ddgst != queue->exp_ddgst) {
> > +                     dev_err(queue->ctrl->ctrl.device,
> > +                             "data digest error: recv %#x expected %#x\n",
> > +                             le32_to_cpu(queue->recv_ddgst),
> > +                             le32_to_cpu(queue->exp_ddgst));
> > +                     return -EIO;
> > +             }
>
> I still dislike this hunk. Can you split the recalc login to its
> own ddp function at least? This is just a confusing piece of code.

mmm, we added two boolean predicates (offload_en and
offload_failed) plus a comment (software-fallback)
to clarify this piece... thought it can fly

> >       if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
> > -             struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
> > -                                             pdu->command_id);
> > +             if (!rq)
> > +                     rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
> > +                                           pdu->command_id);
>
> Why is this change needed? Maybe just move this assignment up?

OK will move it up
diff mbox series

Patch

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 5cb46deb56e0..eb47cf6982d7 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -69,6 +69,7 @@  enum nvme_tcp_queue_flags {
 	NVME_TCP_Q_LIVE		= 1,
 	NVME_TCP_Q_POLLING	= 2,
 	NVME_TCP_Q_OFF_DDP	= 3,
+	NVME_TCP_Q_OFF_DDGST_RX = 4,
 };
 
 enum nvme_tcp_recv_state {
@@ -96,6 +97,7 @@  struct nvme_tcp_queue {
 	size_t			data_remaining;
 	size_t			ddgst_remaining;
 	unsigned int		nr_cqe;
+	bool			ddgst_valid;
 
 	/* send state */
 	struct nvme_tcp_request *request;
@@ -234,7 +236,56 @@  static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
 	return nvme_tcp_pdu_data_left(req) <= len;
 }
 
-#ifdef CONFIG_TCP_DDP
+static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue)
+{
+	return queue->ddgst_valid;
+}
+
+static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue,
+					     struct sk_buff *skb)
+{
+	if (queue->ddgst_valid)
+#ifdef CONFIG_TCP_DDP_CRC
+		queue->ddgst_valid = skb->ddp_crc;
+#else
+		queue->ddgst_valid = false;
+#endif
+}
+
+static int nvme_tcp_req_map_sg(struct nvme_tcp_request *req, struct request *rq)
+{
+	int ret;
+
+	req->ddp.sg_table.sgl = req->ddp.first_sgl;
+	ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq),
+				     req->ddp.sg_table.sgl, SG_CHUNK_SIZE);
+	if (ret)
+		return -ENOMEM;
+	req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl);
+	return 0;
+}
+
+static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash,
+				      struct request *rq)
+{
+	struct nvme_tcp_request *req;
+
+	if (!rq)
+		return;
+
+	req = blk_mq_rq_to_pdu(rq);
+
+	if (!req->offloaded && nvme_tcp_req_map_sg(req, rq))
+		return;
+
+	crypto_ahash_init(hash);
+	req->ddp.sg_table.sgl = req->ddp.first_sgl;
+	ahash_request_set_crypt(hash, req->ddp.sg_table.sgl, NULL,
+				le32_to_cpu(req->data_len));
+	crypto_ahash_update(hash);
+}
+
+#if defined(CONFIG_TCP_DDP) || defined(CONFIG_TCP_DDP_CRC)
 
 static bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags);
 static void nvme_tcp_ddp_teardown_done(void *ddp_ctx);
@@ -290,12 +341,9 @@  int nvme_tcp_setup_ddp(struct nvme_tcp_queue *queue,
 	}
 
 	req->ddp.command_id = command_id;
-	req->ddp.sg_table.sgl = req->ddp.first_sgl;
-	ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq),
-				     req->ddp.sg_table.sgl, SG_CHUNK_SIZE);
+	ret = nvme_tcp_req_map_sg(req, rq);
 	if (ret)
 		return -ENOMEM;
-	req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl);
 
 	ret = netdev->tcp_ddp_ops->tcp_ddp_setup(netdev,
 						 queue->sock->sk,
@@ -317,7 +365,7 @@  int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue)
 		return -ENODEV;
 	}
 
-	if (!(netdev->features & NETIF_F_HW_TCP_DDP)) {
+	if (!(netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX))) {
 		dev_put(netdev);
 		return -EOPNOTSUPP;
 	}
@@ -345,6 +393,9 @@  int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue)
 	if (netdev->features & NETIF_F_HW_TCP_DDP)
 		set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags);
 
+	if (netdev->features & NETIF_F_HW_TCP_DDP_CRC_RX)
+		set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags);
+
 	return ret;
 }
 
@@ -376,7 +427,7 @@  int nvme_tcp_offload_limits(struct nvme_tcp_queue *queue)
 		return -ENODEV;
 	}
 
-	if (netdev->features & NETIF_F_HW_TCP_DDP &&
+	if ((netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX)) &&
 	    netdev->tcp_ddp_ops &&
 	    netdev->tcp_ddp_ops->tcp_ddp_limits)
 		ret = netdev->tcp_ddp_ops->tcp_ddp_limits(netdev, &limits);
@@ -739,6 +790,7 @@  static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
 	queue->pdu_offset = 0;
 	queue->data_remaining = -1;
 	queue->ddgst_remaining = 0;
+	queue->ddgst_valid = true;
 }
 
 static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
@@ -919,7 +971,8 @@  static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 
 	u64 pdu_seq = TCP_SKB_CB(skb)->seq + *offset - queue->pdu_offset;
 
-	if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags))
+	if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) ||
+	    test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
 		nvme_tcp_resync_response(queue, pdu_seq);
 
 	ret = skb_copy_bits(skb, *offset,
@@ -988,6 +1041,8 @@  static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 	struct nvme_tcp_request *req;
 	struct request *rq;
 
+	if (queue->data_digest && test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
+		nvme_tcp_ddp_ddgst_update(queue, skb);
 	rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
 	if (!rq) {
 		dev_err(queue->ctrl->ctrl.device,
@@ -1025,15 +1080,17 @@  static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 		recv_len = min_t(size_t, recv_len,
 				iov_iter_count(&req->iter));
 
-		if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) {
-			if (queue->data_digest)
+		if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) { 
+			if (queue->data_digest &&
+			    !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
 				ret = skb_ddp_copy_and_hash_datagram_iter(skb, *offset,
 						&req->iter, recv_len, queue->rcv_hash);
 			else
 				ret = skb_ddp_copy_datagram_iter(skb, *offset,
 						&req->iter, recv_len);
 		} else {
-			if (queue->data_digest)
+			if (queue->data_digest &&
+			    !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
 				ret = skb_copy_and_hash_datagram_iter(skb, *offset,
 						&req->iter, recv_len, queue->rcv_hash);
 			else
@@ -1055,7 +1112,6 @@  static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 
 	if (!queue->data_remaining) {
 		if (queue->data_digest) {
-			nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
 			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
 		} else {
 			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
@@ -1076,8 +1132,12 @@  static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
 	char *ddgst = (char *)&queue->recv_ddgst;
 	size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
 	off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
+	bool offload_fail, offload_en;
+	struct request *rq = NULL;
 	int ret;
 
+	if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
+		nvme_tcp_ddp_ddgst_update(queue, skb);
 	ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
 	if (unlikely(ret))
 		return ret;
@@ -1088,17 +1148,29 @@  static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
 	if (queue->ddgst_remaining)
 		return 0;
 
-	if (queue->recv_ddgst != queue->exp_ddgst) {
-		dev_err(queue->ctrl->ctrl.device,
-			"data digest error: recv %#x expected %#x\n",
-			le32_to_cpu(queue->recv_ddgst),
-			le32_to_cpu(queue->exp_ddgst));
-		return -EIO;
+	offload_fail = !nvme_tcp_ddp_ddgst_ok(queue);
+	offload_en = test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags);
+	if (!offload_en || offload_fail) {
+		if (offload_en && offload_fail) { // software-fallback
+			rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
+					      pdu->command_id);
+			nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq);
+		}
+
+		nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
+		if (queue->recv_ddgst != queue->exp_ddgst) {
+			dev_err(queue->ctrl->ctrl.device,
+				"data digest error: recv %#x expected %#x\n",
+				le32_to_cpu(queue->recv_ddgst),
+				le32_to_cpu(queue->exp_ddgst));
+			return -EIO;
+		}
 	}
 
 	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
-		struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
-						pdu->command_id);
+		if (!rq)
+			rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
+					      pdu->command_id);
 
 		nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
 		queue->nr_cqe++;
@@ -1841,8 +1913,10 @@  static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
 	nvme_tcp_restore_sock_calls(queue);
 	cancel_work_sync(&queue->io_work);
 
-	if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags))
+	if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) ||
+	    test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
 		nvme_tcp_unoffload_socket(queue);
+
 }
 
 static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
@@ -1970,8 +2044,6 @@  static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
 {
 	int ret;
 
-	to_tcp_ctrl(ctrl)->offloading_netdev = NULL;
-
 	ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
 	if (ret)
 		return ret;