diff mbox series

[v2,net] octeontx2-pf: Disable HW TSO for seg size < 16B

Message ID 20240313063306.32571-1-gakula@marvell.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [v2,net] octeontx2-pf: Disable HW TSO for seg size < 16B | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 956 this patch: 956
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 969 this patch: 969
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 64 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest fail net-next-2024-03-13--09-00 (tests: 908)

Commit Message

Geetha sowjanya March 13, 2024, 6:33 a.m. UTC
Current NIX hardware do not support TSO for the
segment size less 16 bytes. This patch disable hw
TSO for such packets.

Fixes: 86d7476078b8 ("octeontx2-pf: TCP segmentation offload support").
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
---

v1-v2:
 - As suggested by Eric Dumazet used ndo_features_check().
 - Moved the max fargments support check to ndo_features_check.

 .../marvell/octeontx2/nic/otx2_common.c        | 18 ++++++++++++++++++
 .../marvell/octeontx2/nic/otx2_common.h        |  3 +++
 .../ethernet/marvell/octeontx2/nic/otx2_pf.c   |  1 +
 .../ethernet/marvell/octeontx2/nic/otx2_txrx.c | 11 -----------
 .../ethernet/marvell/octeontx2/nic/otx2_vf.c   |  1 +
 5 files changed, 23 insertions(+), 11 deletions(-)

Comments

Eric Dumazet March 13, 2024, 7:57 a.m. UTC | #1
On Wed, Mar 13, 2024 at 7:33 AM Geetha sowjanya <gakula@marvell.com> wrote:
>
> Current NIX hardware do not support TSO for the
> segment size less 16 bytes. This patch disable hw
> TSO for such packets.
>
> Fixes: 86d7476078b8 ("octeontx2-pf: TCP segmentation offload support").
> Signed-off-by: Geetha sowjanya <gakula@marvell.com>
> ---
>
> v1-v2:
>  - As suggested by Eric Dumazet used ndo_features_check().
>  - Moved the max fargments support check to ndo_features_check.
>
>  .../marvell/octeontx2/nic/otx2_common.c        | 18 ++++++++++++++++++
>  .../marvell/octeontx2/nic/otx2_common.h        |  3 +++
>  .../ethernet/marvell/octeontx2/nic/otx2_pf.c   |  1 +
>  .../ethernet/marvell/octeontx2/nic/otx2_txrx.c | 11 -----------
>  .../ethernet/marvell/octeontx2/nic/otx2_vf.c   |  1 +
>  5 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> index 02d0b707aea5..de61c69370be 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> @@ -221,6 +221,24 @@ int otx2_set_mac_address(struct net_device *netdev, void *p)
>  }
>  EXPORT_SYMBOL(otx2_set_mac_address);
>
> +netdev_features_t
> +otx2_features_check(struct sk_buff *skb, struct net_device *dev,
> +                   netdev_features_t features)
> +{
> +       /* Due to hw issue segment size less than 16 bytes
> +        * are not supported. Hence do not offload such TSO
> +        * segments.
> +        */
> +       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_size < 16)
> +               features &= ~NETIF_F_GSO_MASK;
> +
> +       if (skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE)
> +               features &= ~NETIF_F_SG;
> +

This is a bit extreme. I would attempt to remove NETIF_F_GSO_MASK instead.

if (skb_is_gso(skb)) {
     if (skb_shinfo(skb)->gso_size < 16 ||
         skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE))
           features &= ~NETIF_F_GSO_MASK;
}

This would very often end up with 1-MSS packets with fewer fragments.

-> No copy involved, and no high order page allocations.

> +       return features;
> +}
> +EXPORT_SYMBOL(otx2_features_check);
> +
>  int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)
>  {
>         struct nix_frs_cfg *req;
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> index 06910307085e..6a4bf43bc77e 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> @@ -961,6 +961,9 @@ void otx2_get_mac_from_af(struct net_device *netdev);
>  void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx);
>  int otx2_config_pause_frm(struct otx2_nic *pfvf);
>  void otx2_setup_segmentation(struct otx2_nic *pfvf);
> +netdev_features_t otx2_features_check(struct sk_buff *skb,
> +                                     struct net_device *dev,
> +                                     netdev_features_t features);
>
>  /* RVU block related APIs */
>  int otx2_attach_npa_nix(struct otx2_nic *pfvf);
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> index e5fe67e73865..2364eb8d6732 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> @@ -2737,6 +2737,7 @@ static const struct net_device_ops otx2_netdev_ops = {
>         .ndo_xdp_xmit           = otx2_xdp_xmit,
>         .ndo_setup_tc           = otx2_setup_tc,
>         .ndo_set_vf_trust       = otx2_ndo_set_vf_trust,
> +       .ndo_features_check     = otx2_features_check,
>  };
>
>  static int otx2_wq_init(struct otx2_nic *pf)
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> index f828d32737af..9b89aff42eb0 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> @@ -1158,17 +1158,6 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
>
>         num_segs = skb_shinfo(skb)->nr_frags + 1;
>
> -       /* If SKB doesn't fit in a single SQE, linearize it.
> -        * TODO: Consider adding JUMP descriptor instead.
> -        */
> -       if (unlikely(num_segs > OTX2_MAX_FRAGS_IN_SQE)) {
> -               if (__skb_linearize(skb)) {
> -                       dev_kfree_skb_any(skb);
> -                       return true;
> -               }
> -               num_segs = skb_shinfo(skb)->nr_frags + 1;
> -       }

Then you need to keep this check for  non GSO packets.

One way to trigger this is to run netperf with tiny fragments.
TCP is unable to cook GSO packets, because we hit MAX_SKB_FRAGS before
even filling a single MSS.

netperf -H $remote -t TCP_SENDFILE -- -m 10



> -
>         if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) {
>                 /* Insert vlan tag before giving pkt to tso */
>                 if (skb_vlan_tag_present(skb))
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> index 35e06048356f..04aab04e4ba2 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> @@ -483,6 +483,7 @@ static const struct net_device_ops otx2vf_netdev_ops = {
>         .ndo_tx_timeout = otx2_tx_timeout,
>         .ndo_eth_ioctl  = otx2_ioctl,
>         .ndo_setup_tc = otx2_setup_tc,
> +       .ndo_features_check     = otx2_features_check,
>  };
>
>  static int otx2_wq_init(struct otx2_nic *vf)
> --
> 2.25.1
>
Geetha sowjanya March 18, 2024, 12:46 a.m. UTC | #2
> -----Original Message-----
> From: Eric Dumazet <edumazet@google.com>
> Sent: Wednesday, March 13, 2024 1:28 PM
> To: Geethasowjanya Akula <gakula@marvell.com>
> Cc: netdev@vger.kernel.org; linux-kernel@vger.kernel.org; kuba@kernel.org;
> davem@davemloft.net; pabeni@redhat.com; Sunil Kovvuri Goutham
> <sgoutham@marvell.com>; Subbaraya Sundeep Bhatta
> <sbhatta@marvell.com>; Hariprasad Kelam <hkelam@marvell.com>
> Subject: [EXTERNAL] Re: [v2 net PATCH] octeontx2-pf: Disable HW TSO for seg
> size < 16B
> On Wed, Mar 13, 2024 at 7:33 AM Geetha sowjanya <gakula@marvell.com>
> wrote:
> >
> > Current NIX hardware do not support TSO for the segment size less 16
> > bytes. This patch disable hw TSO for such packets.
> >
> > Fixes: 86d7476078b8 ("octeontx2-pf: TCP segmentation offload support").
> > Signed-off-by: Geetha sowjanya <gakula@marvell.com>
> > ---
> >
> > v1-v2:
> >  - As suggested by Eric Dumazet used ndo_features_check().
> >  - Moved the max fargments support check to ndo_features_check.
> >
> >  .../marvell/octeontx2/nic/otx2_common.c        | 18 ++++++++++++++++++
> >  .../marvell/octeontx2/nic/otx2_common.h        |  3 +++
> >  .../ethernet/marvell/octeontx2/nic/otx2_pf.c   |  1 +
> >  .../ethernet/marvell/octeontx2/nic/otx2_txrx.c | 11 -----------
> >  .../ethernet/marvell/octeontx2/nic/otx2_vf.c   |  1 +
> >  5 files changed, 23 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> > index 02d0b707aea5..de61c69370be 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> > @@ -221,6 +221,24 @@ int otx2_set_mac_address(struct net_device
> > *netdev, void *p)  }  EXPORT_SYMBOL(otx2_set_mac_address);
> >
> > +netdev_features_t
> > +otx2_features_check(struct sk_buff *skb, struct net_device *dev,
> > +                   netdev_features_t features) {
> > +       /* Due to hw issue segment size less than 16 bytes
> > +        * are not supported. Hence do not offload such TSO
> > +        * segments.
> > +        */
> > +       if (skb_is_gso(skb) && skb_shinfo(skb)->gso_size < 16)
> > +               features &= ~NETIF_F_GSO_MASK;
> > +
> > +       if (skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE)
> > +               features &= ~NETIF_F_SG;
> > +
> 
> This is a bit extreme. I would attempt to remove NETIF_F_GSO_MASK instead.
> 
> if (skb_is_gso(skb)) {
>      if (skb_shinfo(skb)->gso_size < 16 ||
>          skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE))
>            features &= ~NETIF_F_GSO_MASK; }
> 
> This would very often end up with 1-MSS packets with fewer fragments.
> 
> -> No copy involved, and no high order page allocations.
> 
> > +       return features;
> > +}
> > +EXPORT_SYMBOL(otx2_features_check);
> > +
> >  int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)  {
> >         struct nix_frs_cfg *req;
> > diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> > index 06910307085e..6a4bf43bc77e 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> > @@ -961,6 +961,9 @@ void otx2_get_mac_from_af(struct net_device
> > *netdev);  void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int
> > qidx);  int otx2_config_pause_frm(struct otx2_nic *pfvf);  void
> > otx2_setup_segmentation(struct otx2_nic *pfvf);
> > +netdev_features_t otx2_features_check(struct sk_buff *skb,
> > +                                     struct net_device *dev,
> > +                                     netdev_features_t features);
> >
> >  /* RVU block related APIs */
> >  int otx2_attach_npa_nix(struct otx2_nic *pfvf); diff --git
> > a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> > index e5fe67e73865..2364eb8d6732 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
> > @@ -2737,6 +2737,7 @@ static const struct net_device_ops
> otx2_netdev_ops = {
> >         .ndo_xdp_xmit           = otx2_xdp_xmit,
> >         .ndo_setup_tc           = otx2_setup_tc,
> >         .ndo_set_vf_trust       = otx2_ndo_set_vf_trust,
> > +       .ndo_features_check     = otx2_features_check,
> >  };
> >
> >  static int otx2_wq_init(struct otx2_nic *pf) diff --git
> > a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> > index f828d32737af..9b89aff42eb0 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> > @@ -1158,17 +1158,6 @@ bool otx2_sq_append_skb(struct net_device
> > *netdev, struct otx2_snd_queue *sq,
> >
> >         num_segs = skb_shinfo(skb)->nr_frags + 1;
> >
> > -       /* If SKB doesn't fit in a single SQE, linearize it.
> > -        * TODO: Consider adding JUMP descriptor instead.
> > -        */
> > -       if (unlikely(num_segs > OTX2_MAX_FRAGS_IN_SQE)) {
> > -               if (__skb_linearize(skb)) {
> > -                       dev_kfree_skb_any(skb);
> > -                       return true;
> > -               }
> > -               num_segs = skb_shinfo(skb)->nr_frags + 1;
> > -       }
> 
> Then you need to keep this check for  non GSO packets.
> 
> One way to trigger this is to run netperf with tiny fragments.
> TCP is unable to cook GSO packets, because we hit MAX_SKB_FRAGS before
> even filling a single MSS.
> 
> netperf -H $remote -t TCP_SENDFILE -- -m 10
> 
Will repost the patch with suggested changes and testing.
> 
> 
> > -
> >         if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) {
> >                 /* Insert vlan tag before giving pkt to tso */
> >                 if (skb_vlan_tag_present(skb)) diff --git
> > a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> > b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> > index 35e06048356f..04aab04e4ba2 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> > +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
> > @@ -483,6 +483,7 @@ static const struct net_device_ops
> otx2vf_netdev_ops = {
> >         .ndo_tx_timeout = otx2_tx_timeout,
> >         .ndo_eth_ioctl  = otx2_ioctl,
> >         .ndo_setup_tc = otx2_setup_tc,
> > +       .ndo_features_check     = otx2_features_check,
> >  };
> >
> >  static int otx2_wq_init(struct otx2_nic *vf)
> > --
> > 2.25.1
> >
diff mbox series

Patch

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 02d0b707aea5..de61c69370be 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -221,6 +221,24 @@  int otx2_set_mac_address(struct net_device *netdev, void *p)
 }
 EXPORT_SYMBOL(otx2_set_mac_address);
 
+netdev_features_t
+otx2_features_check(struct sk_buff *skb, struct net_device *dev,
+		    netdev_features_t features)
+{
+	/* Due to hw issue segment size less than 16 bytes
+	 * are not supported. Hence do not offload such TSO
+	 * segments.
+	 */
+	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_size < 16)
+		features &= ~NETIF_F_GSO_MASK;
+
+	if (skb_shinfo(skb)->nr_frags + 1 > OTX2_MAX_FRAGS_IN_SQE)
+		features &= ~NETIF_F_SG;
+
+	return features;
+}
+EXPORT_SYMBOL(otx2_features_check);
+
 int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu)
 {
 	struct nix_frs_cfg *req;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 06910307085e..6a4bf43bc77e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -961,6 +961,9 @@  void otx2_get_mac_from_af(struct net_device *netdev);
 void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx);
 int otx2_config_pause_frm(struct otx2_nic *pfvf);
 void otx2_setup_segmentation(struct otx2_nic *pfvf);
+netdev_features_t otx2_features_check(struct sk_buff *skb,
+				      struct net_device *dev,
+				      netdev_features_t features);
 
 /* RVU block related APIs */
 int otx2_attach_npa_nix(struct otx2_nic *pfvf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index e5fe67e73865..2364eb8d6732 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -2737,6 +2737,7 @@  static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_xdp_xmit           = otx2_xdp_xmit,
 	.ndo_setup_tc		= otx2_setup_tc,
 	.ndo_set_vf_trust	= otx2_ndo_set_vf_trust,
+	.ndo_features_check	= otx2_features_check,
 };
 
 static int otx2_wq_init(struct otx2_nic *pf)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index f828d32737af..9b89aff42eb0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -1158,17 +1158,6 @@  bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq,
 
 	num_segs = skb_shinfo(skb)->nr_frags + 1;
 
-	/* If SKB doesn't fit in a single SQE, linearize it.
-	 * TODO: Consider adding JUMP descriptor instead.
-	 */
-	if (unlikely(num_segs > OTX2_MAX_FRAGS_IN_SQE)) {
-		if (__skb_linearize(skb)) {
-			dev_kfree_skb_any(skb);
-			return true;
-		}
-		num_segs = skb_shinfo(skb)->nr_frags + 1;
-	}
-
 	if (skb_shinfo(skb)->gso_size && !is_hw_tso_supported(pfvf, skb)) {
 		/* Insert vlan tag before giving pkt to tso */
 		if (skb_vlan_tag_present(skb))
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 35e06048356f..04aab04e4ba2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -483,6 +483,7 @@  static const struct net_device_ops otx2vf_netdev_ops = {
 	.ndo_tx_timeout = otx2_tx_timeout,
 	.ndo_eth_ioctl	= otx2_ioctl,
 	.ndo_setup_tc = otx2_setup_tc,
+	.ndo_features_check	= otx2_features_check,
 };
 
 static int otx2_wq_init(struct otx2_nic *vf)