diff mbox series

[RFC,net-next,v5,2/2] bnxt: Use generic HBH removal helper in tx path

Message ID 20221207225435.1273226-2-lixiaoyan@google.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v5,1/2] IPv6/GRO: generic helper to remove temporary HBH/jumbo header in driver | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1 this patch: 1
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1 this patch: 1
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Coco Li Dec. 7, 2022, 10:54 p.m. UTC
Eric Dumazet implemented Big TCP that allowed bigger TSO/GRO packet sizes
for IPv6 traffic. See patch series:
'commit 89527be8d8d6 ("net: add IFLA_TSO_{MAX_SIZE|SEGS} attributes")'

This reduces the number of packets traversing the networking stack and
should usually improves performance. However, it also inserts a
temporary Hop-by-hop IPv6 extension header.

Using the HBH header removal method in the previous path, the extra header
be removed in bnxt drivers to allow it to send big TCP packets (bigger
TSO packets) as well.

Tested:
Compiled locally

To further test functional correctness, update the GSO/GRO limit on the
physical NIC:

ip link set eth0 gso_max_size 181000
ip link set eth0 gro_max_size 181000

Note that if there are bonding or ipvan devices on top of the physical
NIC, their GSO sizes need to be updated as well.

Then, IPv6/TCP packets with sizes larger than 64k can be observed.

Big TCP functionality is tested by Michael, feature checks not yet.

Tested by Michael:
I've confirmed with our hardware team that this is supported by our
chips, and I've tested it up to gso_max_size of 524280.  Thanks.

Tested-by: Michael Chan <michael.chan@broadcom.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Coco Li <lixiaoyan@google.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 ++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

Comments

Saeed Mahameed Dec. 8, 2022, 12:32 a.m. UTC | #1
On 07 Dec 14:54, Coco Li wrote:
>Eric Dumazet implemented Big TCP that allowed bigger TSO/GRO packet sizes
>for IPv6 traffic. See patch series:
>'commit 89527be8d8d6 ("net: add IFLA_TSO_{MAX_SIZE|SEGS} attributes")'
>
>This reduces the number of packets traversing the networking stack and
>should usually improves performance. However, it also inserts a
>temporary Hop-by-hop IPv6 extension header.
>
>Using the HBH header removal method in the previous path, the extra header
                                                      ^ patch
>be removed in bnxt drivers to allow it to send big TCP packets (bigger
>TSO packets) as well.
>

I think Eric didn't expose this function because it isn't efficient for
drivers who are already processing the headers separately from payload for
LSO packets .. the trick is to have an optimized copy method depending on
your driver xmit function, usually you would just memcpy the TCP header over
the HBH exactly at the point you copy/process those headers into the HW
descriptor.

>Tested:
>Compiled locally
>
>To further test functional correctness, update the GSO/GRO limit on the
>physical NIC:
>
>ip link set eth0 gso_max_size 181000
>ip link set eth0 gro_max_size 181000
>
>Note that if there are bonding or ipvan devices on top of the physical
>NIC, their GSO sizes need to be updated as well.
>
>Then, IPv6/TCP packets with sizes larger than 64k can be observed.
>
>Big TCP functionality is tested by Michael, feature checks not yet.
>
>Tested by Michael:
>I've confirmed with our hardware team that this is supported by our
>chips, and I've tested it up to gso_max_size of 524280.  Thanks.
>
>Tested-by: Michael Chan <michael.chan@broadcom.com>
>Reviewed-by: Michael Chan <michael.chan@broadcom.com>
>Signed-off-by: Coco Li <lixiaoyan@google.com>
>---
> drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 ++++++++++++++++++++++-
> 1 file changed, 25 insertions(+), 1 deletion(-)
>
>diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
>index 0fe164b42c5d..6ba1cd342a80 100644
>--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
>+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
>@@ -389,6 +389,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
> 			return NETDEV_TX_BUSY;
> 	}
>
>+	if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
>+		goto tx_free;
>+
> 	length = skb->len;
> 	len = skb_headlen(skb);
> 	last_frag = skb_shinfo(skb)->nr_frags;
>@@ -11315,6 +11318,7 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
> 			      u8 **nextp)
> {
> 	struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
>+	struct hop_jumbo_hdr *jhdr;
> 	int hdr_count = 0;
> 	u8 *nexthdr;
> 	int start;
>@@ -11342,9 +11346,27 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
>
> 		if (hdrlen > 64)
> 			return false;
>+
>+		/* The ext header may be a hop-by-hop header inserted for
>+		 * big TCP purposes. This will be removed before sending
>+		 * from NIC, so do not count it.
>+		 */
>+		if (*nexthdr == NEXTHDR_HOP) {
>+			if (likely(skb->len <= GRO_LEGACY_MAX_SIZE))
>+				goto increment_hdr;
>+
>+			jhdr = (struct hop_jumbo_hdr *)nexthdr;
>+			if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
>+			    jhdr->nexthdr != IPPROTO_TCP)
>+				goto increment_hdr;
>+
>+			goto next_hdr;
>+		}
>+increment_hdr:
>+		hdr_count++;
>+next_hdr:
> 		nexthdr = &hp->nexthdr;
> 		start += hdrlen;
>-		hdr_count++;
> 	}
> 	if (nextp) {
> 		/* Caller will check inner protocol */
>@@ -13657,6 +13679,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> 		dev->features &= ~NETIF_F_LRO;
> 	dev->priv_flags |= IFF_UNICAST_FLT;
>
>+	netif_set_tso_max_size(dev, GSO_MAX_SIZE);
>+
> #ifdef CONFIG_BNXT_SRIOV
> 	init_waitqueue_head(&bp->sriov_cfg_wait);
> #endif
>-- 
>2.39.0.rc0.267.gcb52ba06e7-goog
>
Michael Chan Dec. 8, 2022, 7:54 p.m. UTC | #2
On Wed, Dec 7, 2022 at 2:54 PM Coco Li <lixiaoyan@google.com> wrote:
>
> Eric Dumazet implemented Big TCP that allowed bigger TSO/GRO packet sizes
> for IPv6 traffic. See patch series:
> 'commit 89527be8d8d6 ("net: add IFLA_TSO_{MAX_SIZE|SEGS} attributes")'
>
> This reduces the number of packets traversing the networking stack and
> should usually improves performance. However, it also inserts a
> temporary Hop-by-hop IPv6 extension header.
>
> Using the HBH header removal method in the previous path, the extra header
> be removed in bnxt drivers to allow it to send big TCP packets (bigger
> TSO packets) as well.
>
> Tested:
> Compiled locally
>
> To further test functional correctness, update the GSO/GRO limit on the
> physical NIC:
>
> ip link set eth0 gso_max_size 181000
> ip link set eth0 gro_max_size 181000
>
> Note that if there are bonding or ipvan devices on top of the physical
> NIC, their GSO sizes need to be updated as well.
>
> Then, IPv6/TCP packets with sizes larger than 64k can be observed.
>
> Big TCP functionality is tested by Michael, feature checks not yet.
>
> Tested by Michael:
> I've confirmed with our hardware team that this is supported by our
> chips, and I've tested it up to gso_max_size of 524280.  Thanks.
>
> Tested-by: Michael Chan <michael.chan@broadcom.com>
> Reviewed-by: Michael Chan <michael.chan@broadcom.com>

If you have made changes since the last version, please drop these
tags.  Reviewers will provide new tags after reviewing the new
version.

> Signed-off-by: Coco Li <lixiaoyan@google.com>
> ---
>  drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 ++++++++++++++++++++++-
>  1 file changed, 25 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index 0fe164b42c5d..6ba1cd342a80 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -389,6 +389,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
>                         return NETDEV_TX_BUSY;
>         }
>
> +       if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
> +               goto tx_free;
> +
>         length = skb->len;
>         len = skb_headlen(skb);
>         last_frag = skb_shinfo(skb)->nr_frags;
> @@ -11315,6 +11318,7 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
>                               u8 **nextp)
>  {
>         struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
> +       struct hop_jumbo_hdr *jhdr;
>         int hdr_count = 0;
>         u8 *nexthdr;
>         int start;
> @@ -11342,9 +11346,27 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
>
>                 if (hdrlen > 64)
>                         return false;
> +
> +               /* The ext header may be a hop-by-hop header inserted for
> +                * big TCP purposes. This will be removed before sending
> +                * from NIC, so do not count it.
> +                */
> +               if (*nexthdr == NEXTHDR_HOP) {
> +                       if (likely(skb->len <= GRO_LEGACY_MAX_SIZE))
> +                               goto increment_hdr;
> +
> +                       jhdr = (struct hop_jumbo_hdr *)nexthdr;

I already explained when reviewing your last version that nexthdr
initially points to the next header field within the ipv6 header so
this won't work.  If you cast it to jhdr, jhdr will be at offset 6 of
the ipv6 header.  It won't be pointing to the extension header.  You
need to do:

jhdr = (struct hop_jumbo_hdr *)hp

hp is pointing to the extension header.

Thanks.

> +                       if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
> +                           jhdr->nexthdr != IPPROTO_TCP)
> +                               goto increment_hdr;
> +
> +                       goto next_hdr;
> +               }
> +increment_hdr:
> +               hdr_count++;
> +next_hdr:
>                 nexthdr = &hp->nexthdr;
>                 start += hdrlen;
> -               hdr_count++;
>         }
>         if (nextp) {
>                 /* Caller will check inner protocol */
Coco Li Dec. 10, 2022, 3:53 a.m. UTC | #3
I agree that this function isn't efficient for drivers who already
copy headers, which can just copy over the needed parts of the header
as you mentioned. However, for drivers that need HBH header removed in
place, it would be a nice function to have (and it reduces code
duplication, see function be reused for GSO path).

On Wed, Dec 7, 2022 at 4:33 PM Saeed Mahameed <saeed@kernel.org> wrote:
>
>
> On 07 Dec 14:54, Coco Li wrote:
> >Eric Dumazet implemented Big TCP that allowed bigger TSO/GRO packet sizes
> >for IPv6 traffic. See patch series:
> >'commit 89527be8d8d6 ("net: add IFLA_TSO_{MAX_SIZE|SEGS} attributes")'
> >
> >This reduces the number of packets traversing the networking stack and
> >should usually improves performance. However, it also inserts a
> >temporary Hop-by-hop IPv6 extension header.
> >
> >Using the HBH header removal method in the previous path, the extra header
>                                                       ^ patch
> >be removed in bnxt drivers to allow it to send big TCP packets (bigger
> >TSO packets) as well.
> >
>
> I think Eric didn't expose this function because it isn't efficient for
> drivers who are already processing the headers separately from payload for
> LSO packets .. the trick is to have an optimized copy method depending on
> your driver xmit function, usually you would just memcpy the TCP header over
> the HBH exactly at the point you copy/process those headers into the HW
> descriptor.
>
> >Tested:
> >Compiled locally
> >
> >To further test functional correctness, update the GSO/GRO limit on the
> >physical NIC:
> >
> >ip link set eth0 gso_max_size 181000
> >ip link set eth0 gro_max_size 181000
> >
> >Note that if there are bonding or ipvan devices on top of the physical
> >NIC, their GSO sizes need to be updated as well.
> >
> >Then, IPv6/TCP packets with sizes larger than 64k can be observed.
> >
> >Big TCP functionality is tested by Michael, feature checks not yet.
> >
> >Tested by Michael:
> >I've confirmed with our hardware team that this is supported by our
> >chips, and I've tested it up to gso_max_size of 524280.  Thanks.
> >
> >Tested-by: Michael Chan <michael.chan@broadcom.com>
> >Reviewed-by: Michael Chan <michael.chan@broadcom.com>
> >Signed-off-by: Coco Li <lixiaoyan@google.com>
> >---
> > drivers/net/ethernet/broadcom/bnxt/bnxt.c | 26 ++++++++++++++++++++++-
> > 1 file changed, 25 insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> >index 0fe164b42c5d..6ba1cd342a80 100644
> >--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> >+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> >@@ -389,6 +389,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
> >                       return NETDEV_TX_BUSY;
> >       }
> >
> >+      if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
> >+              goto tx_free;
> >+
> >       length = skb->len;
> >       len = skb_headlen(skb);
> >       last_frag = skb_shinfo(skb)->nr_frags;
> >@@ -11315,6 +11318,7 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
> >                             u8 **nextp)
> > {
> >       struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
> >+      struct hop_jumbo_hdr *jhdr;
> >       int hdr_count = 0;
> >       u8 *nexthdr;
> >       int start;
> >@@ -11342,9 +11346,27 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
> >
> >               if (hdrlen > 64)
> >                       return false;
> >+
> >+              /* The ext header may be a hop-by-hop header inserted for
> >+               * big TCP purposes. This will be removed before sending
> >+               * from NIC, so do not count it.
> >+               */
> >+              if (*nexthdr == NEXTHDR_HOP) {
> >+                      if (likely(skb->len <= GRO_LEGACY_MAX_SIZE))
> >+                              goto increment_hdr;
> >+
> >+                      jhdr = (struct hop_jumbo_hdr *)nexthdr;
> >+                      if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
> >+                          jhdr->nexthdr != IPPROTO_TCP)
> >+                              goto increment_hdr;
> >+
> >+                      goto next_hdr;
> >+              }
> >+increment_hdr:
> >+              hdr_count++;
> >+next_hdr:
> >               nexthdr = &hp->nexthdr;
> >               start += hdrlen;
> >-              hdr_count++;
> >       }
> >       if (nextp) {
> >               /* Caller will check inner protocol */
> >@@ -13657,6 +13679,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
> >               dev->features &= ~NETIF_F_LRO;
> >       dev->priv_flags |= IFF_UNICAST_FLT;
> >
> >+      netif_set_tso_max_size(dev, GSO_MAX_SIZE);
> >+
> > #ifdef CONFIG_BNXT_SRIOV
> >       init_waitqueue_head(&bp->sriov_cfg_wait);
> > #endif
> >--
> >2.39.0.rc0.267.gcb52ba06e7-goog
> >
diff mbox series

Patch

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0fe164b42c5d..6ba1cd342a80 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -389,6 +389,9 @@  static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NETDEV_TX_BUSY;
 	}
 
+	if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
+		goto tx_free;
+
 	length = skb->len;
 	len = skb_headlen(skb);
 	last_frag = skb_shinfo(skb)->nr_frags;
@@ -11315,6 +11318,7 @@  static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
 			      u8 **nextp)
 {
 	struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
+	struct hop_jumbo_hdr *jhdr;
 	int hdr_count = 0;
 	u8 *nexthdr;
 	int start;
@@ -11342,9 +11346,27 @@  static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
 
 		if (hdrlen > 64)
 			return false;
+
+		/* The ext header may be a hop-by-hop header inserted for
+		 * big TCP purposes. This will be removed before sending
+		 * from NIC, so do not count it.
+		 */
+		if (*nexthdr == NEXTHDR_HOP) {
+			if (likely(skb->len <= GRO_LEGACY_MAX_SIZE))
+				goto increment_hdr;
+
+			jhdr = (struct hop_jumbo_hdr *)nexthdr;
+			if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
+			    jhdr->nexthdr != IPPROTO_TCP)
+				goto increment_hdr;
+
+			goto next_hdr;
+		}
+increment_hdr:
+		hdr_count++;
+next_hdr:
 		nexthdr = &hp->nexthdr;
 		start += hdrlen;
-		hdr_count++;
 	}
 	if (nextp) {
 		/* Caller will check inner protocol */
@@ -13657,6 +13679,8 @@  static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev->features &= ~NETIF_F_LRO;
 	dev->priv_flags |= IFF_UNICAST_FLT;
 
+	netif_set_tso_max_size(dev, GSO_MAX_SIZE);
+
 #ifdef CONFIG_BNXT_SRIOV
 	init_waitqueue_head(&bp->sriov_cfg_wait);
 #endif