diff mbox series

[v4,net-next,07/12] ipv6: add IFLA_GRO_IPV6_MAX_SIZE

Message ID 20220506153048.3695721-8-eric.dumazet@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series tcp: BIG TCP implementation | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4696 this patch: 4696
netdev/cc_maintainers warning 2 maintainers not CCed: petrm@nvidia.com liuhangbin@gmail.com
netdev/build_clang success Errors and warnings before: 1059 this patch: 1059
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4847 this patch: 4847
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: line length of 82 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet May 6, 2022, 3:30 p.m. UTC
From: Coco Li <lixiaoyan@google.com>

Enable GRO to have IPv6 specific limit for max packet size.

This patch introduces new dev->gro_ipv6_max_size
that is modifiable through ip link.

ip link set dev eth0 gro_ipv6_max_size 185000

Note that this value is only considered if bigger than
gro_max_size, and for non encapsulated TCP/ipv6 packets.

Signed-off-by: Coco Li <lixiaoyan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/netdevice.h          |  3 +++
 include/uapi/linux/if_link.h       |  1 +
 net/core/dev.c                     |  1 +
 net/core/gro.c                     | 20 ++++++++++++++++++--
 net/core/rtnetlink.c               | 22 ++++++++++++++++++++++
 tools/include/uapi/linux/if_link.h |  1 +
 6 files changed, 46 insertions(+), 2 deletions(-)

Comments

Alexander Duyck May 6, 2022, 9:06 p.m. UTC | #1
On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> From: Coco Li <lixiaoyan@google.com>
> 
> Enable GRO to have IPv6 specific limit for max packet size.
> 
> This patch introduces new dev->gro_ipv6_max_size
> that is modifiable through ip link.
> 
> ip link set dev eth0 gro_ipv6_max_size 185000
> 
> Note that this value is only considered if bigger than
> gro_max_size, and for non encapsulated TCP/ipv6 packets.
> 
> Signed-off-by: Coco Li <lixiaoyan@google.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

This is another spot where it doesn't make much sense to me to add yet
another control. Instead it would make much more sense to simply remove
the cap from the existing control and simply add a check that caps the
non-IPv6 protocols at GRO_MAX_SIZE.

> ---
>  include/linux/netdevice.h          |  3 +++
>  include/uapi/linux/if_link.h       |  1 +
>  net/core/dev.c                     |  1 +
>  net/core/gro.c                     | 20 ++++++++++++++++++--
>  net/core/rtnetlink.c               | 22 ++++++++++++++++++++++
>  tools/include/uapi/linux/if_link.h |  1 +
>  6 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 47f413dac12e901700045f4b73d47ecdca0f4f3c..df12c9843d94cb847e0ce5ba1b3b36bde7d476ed 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1962,6 +1962,8 @@ enum netdev_ml_priv_type {
>   *			keep a list of interfaces to be deleted.
>   *	@gro_max_size:	Maximum size of aggregated packet in generic
>   *			receive offload (GRO)
> + *	@gro_ipv6_max_size:	Maximum size of aggregated packet in generic
> + *				receive offload (GRO), for IPv6
>   *
>   *	@dev_addr_shadow:	Copy of @dev_addr to catch direct writes.
>   *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
> @@ -2154,6 +2156,7 @@ struct net_device {
>  	int			napi_defer_hard_irqs;
>  #define GRO_MAX_SIZE		65536
>  	unsigned int		gro_max_size;
> +	unsigned int		gro_ipv6_max_size;
>  	rx_handler_func_t __rcu	*rx_handler;
>  	void __rcu		*rx_handler_data;
>  
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index aa05fc9cc23f4ccf92f4cbba57f43472749cd42a..9ece3a391105c171057cc491c1458ee8a45e07e0 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -371,6 +371,7 @@ enum {
>  	IFLA_TSO_MAX_SIZE,
>  	IFLA_TSO_MAX_SEGS,
>  	IFLA_GSO_IPV6_MAX_SIZE,
> +	IFLA_GRO_IPV6_MAX_SIZE,
>  
>  	__IFLA_MAX
>  };
> diff --git a/net/core/dev.c b/net/core/dev.c
> index aa8757215b2a9f14683f95086732668eb99a875b..582b7fe052a6fb06437f95bd6a451b79e188cc57 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -10608,6 +10608,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
>  	dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
>  	dev->tso_max_segs = TSO_MAX_SEGS;
>  	dev->gso_ipv6_max_size = GSO_MAX_SIZE;
> +	dev->gro_ipv6_max_size = GRO_MAX_SIZE;
>  
>  	dev->upper_level = 1;
>  	dev->lower_level = 1;
> diff --git a/net/core/gro.c b/net/core/gro.c
> index 78110edf5d4b36d2fa6f8a2676096efe0112aa0e..8b35403dd7e909a8d7df591d952a4600c13f360b 100644
> --- a/net/core/gro.c
> +++ b/net/core/gro.c
> @@ -161,11 +161,27 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>  	unsigned int new_truesize;
>  	struct sk_buff *lp;
>  
> +	if (unlikely(NAPI_GRO_CB(skb)->flush))
> +		return -E2BIG;
> +
>  	/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
>  	gro_max_size = READ_ONCE(p->dev->gro_max_size);
>  
> -	if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
> -		return -E2BIG;

So if we just overwrite the existing gro_max_size we could skip the
changes above and all the extra netlink overhead.

> +	if (unlikely(p->len + len >= gro_max_size)) {
> +		/* pairs with WRITE_ONCE() in netif_set_gro_ipv6_max_size() */
> +		unsigned int gro6_max_size = READ_ONCE(p->dev->gro_ipv6_max_size);
> +
> +		if (gro6_max_size > gro_max_size &&
> +		    p->protocol == htons(ETH_P_IPV6) &&
> +		    skb_headroom(p) >= sizeof(struct hop_jumbo_hdr) &&
> +		    ipv6_hdr(p)->nexthdr == IPPROTO_TCP &&
> +		    !p->encapsulation)
> +			gro_max_size = gro6_max_size;
> +
> +		if (p->len + len >= gro_max_size)
> +			return -E2BIG;
> +	}
> +

Instead all we would need to do is add an extra section here along the
lines of:
	if (p->len + len > GRO_MAX_SIZE &&
	    (p->protocol != htons(ETH_P_IPV6) ||
	     skb_headroom(p) < sizeof(struct hop_jumbo_hdr) ||
	     ipv6_hdr(p)->nexthdr != IPPROTO_TCP ||
	     p->encapsulation)
		return -E2BIG;
Eric Dumazet May 6, 2022, 9:22 p.m. UTC | #2
On Fri, May 6, 2022 at 2:06 PM Alexander H Duyck
<alexander.duyck@gmail.com> wrote:
>
> On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> > From: Coco Li <lixiaoyan@google.com>
> >
> > Enable GRO to have IPv6 specific limit for max packet size.
> >
> > This patch introduces new dev->gro_ipv6_max_size
> > that is modifiable through ip link.
> >
> > ip link set dev eth0 gro_ipv6_max_size 185000
> >
> > Note that this value is only considered if bigger than
> > gro_max_size, and for non encapsulated TCP/ipv6 packets.
> >
> > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
>
> This is another spot where it doesn't make much sense to me to add yet
> another control. Instead it would make much more sense to simply remove
> the cap from the existing control and simply add a check that caps the
> non-IPv6 protocols at GRO_MAX_SIZE.

Can you please send a diff on top of our patch series ?

It is kind of hard to see what you want, and _why_ you want this.

Note that GRO_MAX_SIZE has been replaced by dev->gro_max_size last year.

Yes, yet another control, but some people want more control than others I guess.
Alexander Duyck May 6, 2022, 10:01 p.m. UTC | #3
On Fri, May 6, 2022 at 2:22 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Fri, May 6, 2022 at 2:06 PM Alexander H Duyck
> <alexander.duyck@gmail.com> wrote:
> >
> > On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> > > From: Coco Li <lixiaoyan@google.com>
> > >
> > > Enable GRO to have IPv6 specific limit for max packet size.
> > >
> > > This patch introduces new dev->gro_ipv6_max_size
> > > that is modifiable through ip link.
> > >
> > > ip link set dev eth0 gro_ipv6_max_size 185000
> > >
> > > Note that this value is only considered if bigger than
> > > gro_max_size, and for non encapsulated TCP/ipv6 packets.
> > >
> > > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> >
> > This is another spot where it doesn't make much sense to me to add yet
> > another control. Instead it would make much more sense to simply remove
> > the cap from the existing control and simply add a check that caps the
> > non-IPv6 protocols at GRO_MAX_SIZE.
>
> Can you please send a diff on top of our patch series ?

I would rather not as it would essentially just be a revert of the two
problematic patches since what I am suggesting is significantly
smaller.

> It is kind of hard to see what you want, and _why_ you want this.
>
> Note that GRO_MAX_SIZE has been replaced by dev->gro_max_size last year.

I am using GRO_MAX_SIZE as a legacy value for everything that is not
IPv6. If it would help you could go back and take a look at Jakub's
patch series and see what he did with TSO_LEGACY_MAX_SIZE. You could
think of my use here as GRO_LEGACY_MAX_SIZE. What I am doing is
capping all the non-ipv6/tcp flows at the default maximum limit for
legacy setups.

> Yes, yet another control, but some people want more control than others I guess.

Basically these patches are reducing functionality from an existing
control. The g[sr]o_max_size values were applied to all incoming or
outgoing traffic. The patches are adding a special control that only
applies to a subset of ipv6 traffic. Instead of taking that route I
would rather have the max_size values allowed to exceed the legacy
limits, and in those cases that cannot support the new sizes we
default back to the legacy maxes. Doing that I feel like we would get
much more consistent behavior and if somebody is wanting to use these
values for their original intended purpose which was limiting the
traffic they will be able to affect all traffic, not just the
non-ipv6/tcp traffic.
Eric Dumazet May 6, 2022, 10:08 p.m. UTC | #4
On Fri, May 6, 2022 at 3:01 PM Alexander Duyck
<alexander.duyck@gmail.com> wrote:
>
> On Fri, May 6, 2022 at 2:22 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Fri, May 6, 2022 at 2:06 PM Alexander H Duyck
> > <alexander.duyck@gmail.com> wrote:
> > >
> > > On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> > > > From: Coco Li <lixiaoyan@google.com>
> > > >
> > > > Enable GRO to have IPv6 specific limit for max packet size.
> > > >
> > > > This patch introduces new dev->gro_ipv6_max_size
> > > > that is modifiable through ip link.
> > > >
> > > > ip link set dev eth0 gro_ipv6_max_size 185000
> > > >
> > > > Note that this value is only considered if bigger than
> > > > gro_max_size, and for non encapsulated TCP/ipv6 packets.
> > > >
> > > > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > >
> > > This is another spot where it doesn't make much sense to me to add yet
> > > another control. Instead it would make much more sense to simply remove
> > > the cap from the existing control and simply add a check that caps the
> > > non-IPv6 protocols at GRO_MAX_SIZE.
> >
> > Can you please send a diff on top of our patch series ?
>
> I would rather not as it would essentially just be a revert of the two
> problematic patches since what I am suggesting is significantly
> smaller.
>
> > It is kind of hard to see what you want, and _why_ you want this.
> >
> > Note that GRO_MAX_SIZE has been replaced by dev->gro_max_size last year.
>
> I am using GRO_MAX_SIZE as a legacy value for everything that is not
> IPv6. If it would help you could go back and take a look at Jakub's
> patch series and see what he did with TSO_LEGACY_MAX_SIZE.

Yes, I was the one suggesting this TSO_LEGACY_MAX_SIZE.

> You could
> think of my use here as GRO_LEGACY_MAX_SIZE. What I am doing is
> capping all the non-ipv6/tcp flows at the default maximum limit for
> legacy setups.
>
> > Yes, yet another control, but some people want more control than others I guess.
>
> Basically these patches are reducing functionality from an existing
> control. The g[sr]o_max_size values were applied to all incoming or
> outgoing traffic.

Yes, and we need to change that, otherwise we are stuck at 65536,
because legacy.

> The patches are adding a special control that only applies to a subset of ipv6 traffic.

Exactly. This is not an accident.

> Instead of taking that route I
> would rather have the max_size values allowed to exceed the legacy
> limits, and in those cases that cannot support the new sizes we
> default back to the legacy maxes.

Please send a tested patch. I think it will break drivers.

We spent months doing extensive tests, and I do not see any reason to spend more
time on something that you suggest that I feel is wrong.

> Doing that I feel like we would get
> much more consistent behavior and if somebody is wanting to use these
> values for their original intended purpose which was limiting the
> traffic they will be able to affect all traffic, not just the
> non-ipv6/tcp traffic.

Some people (not us) want to add BIG-TCP with IPv4 as well in a future
evolution.
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 47f413dac12e901700045f4b73d47ecdca0f4f3c..df12c9843d94cb847e0ce5ba1b3b36bde7d476ed 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1962,6 +1962,8 @@  enum netdev_ml_priv_type {
  *			keep a list of interfaces to be deleted.
  *	@gro_max_size:	Maximum size of aggregated packet in generic
  *			receive offload (GRO)
+ *	@gro_ipv6_max_size:	Maximum size of aggregated packet in generic
+ *				receive offload (GRO), for IPv6
  *
  *	@dev_addr_shadow:	Copy of @dev_addr to catch direct writes.
  *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
@@ -2154,6 +2156,7 @@  struct net_device {
 	int			napi_defer_hard_irqs;
 #define GRO_MAX_SIZE		65536
 	unsigned int		gro_max_size;
+	unsigned int		gro_ipv6_max_size;
 	rx_handler_func_t __rcu	*rx_handler;
 	void __rcu		*rx_handler_data;
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index aa05fc9cc23f4ccf92f4cbba57f43472749cd42a..9ece3a391105c171057cc491c1458ee8a45e07e0 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -371,6 +371,7 @@  enum {
 	IFLA_TSO_MAX_SIZE,
 	IFLA_TSO_MAX_SEGS,
 	IFLA_GSO_IPV6_MAX_SIZE,
+	IFLA_GRO_IPV6_MAX_SIZE,
 
 	__IFLA_MAX
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index aa8757215b2a9f14683f95086732668eb99a875b..582b7fe052a6fb06437f95bd6a451b79e188cc57 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10608,6 +10608,7 @@  struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
 	dev->tso_max_segs = TSO_MAX_SEGS;
 	dev->gso_ipv6_max_size = GSO_MAX_SIZE;
+	dev->gro_ipv6_max_size = GRO_MAX_SIZE;
 
 	dev->upper_level = 1;
 	dev->lower_level = 1;
diff --git a/net/core/gro.c b/net/core/gro.c
index 78110edf5d4b36d2fa6f8a2676096efe0112aa0e..8b35403dd7e909a8d7df591d952a4600c13f360b 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -161,11 +161,27 @@  int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 	unsigned int new_truesize;
 	struct sk_buff *lp;
 
+	if (unlikely(NAPI_GRO_CB(skb)->flush))
+		return -E2BIG;
+
 	/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
 	gro_max_size = READ_ONCE(p->dev->gro_max_size);
 
-	if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
-		return -E2BIG;
+	if (unlikely(p->len + len >= gro_max_size)) {
+		/* pairs with WRITE_ONCE() in netif_set_gro_ipv6_max_size() */
+		unsigned int gro6_max_size = READ_ONCE(p->dev->gro_ipv6_max_size);
+
+		if (gro6_max_size > gro_max_size &&
+		    p->protocol == htons(ETH_P_IPV6) &&
+		    skb_headroom(p) >= sizeof(struct hop_jumbo_hdr) &&
+		    ipv6_hdr(p)->nexthdr == IPPROTO_TCP &&
+		    !p->encapsulation)
+			gro_max_size = gro6_max_size;
+
+		if (p->len + len >= gro_max_size)
+			return -E2BIG;
+	}
+
 
 	lp = NAPI_GRO_CB(p)->last;
 	pinfo = skb_shinfo(lp);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 847cf80f81754451e5f220f846db734a7625695b..5fa3ff835aaf6601c31458ec88e88837d353eabd 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1067,6 +1067,7 @@  static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */
 	       + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */
 	       + nla_total_size(4) /* IFLA_GSO_IPV6_MAX_SIZE */
+	       + nla_total_size(4) /* IFLA_GRO_IPV6_MAX_SIZE */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -1775,6 +1776,7 @@  static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	    nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
 	    nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
 	    nla_put_u32(skb, IFLA_GSO_IPV6_MAX_SIZE, dev->gso_ipv6_max_size) ||
+	    nla_put_u32(skb, IFLA_GRO_IPV6_MAX_SIZE, dev->gro_ipv6_max_size) ||
 #ifdef CONFIG_RPS
 	    nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
 #endif
@@ -1931,6 +1933,7 @@  static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_TSO_MAX_SIZE]	= { .type = NLA_REJECT },
 	[IFLA_TSO_MAX_SEGS]	= { .type = NLA_REJECT },
 	[IFLA_GSO_IPV6_MAX_SIZE] = { .type = NLA_U32 },
+	[IFLA_GRO_IPV6_MAX_SIZE] = { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2655,6 +2658,13 @@  static void netif_set_gso_ipv6_max_size(struct net_device *dev,
 	WRITE_ONCE(dev->gso_ipv6_max_size, size);
 }
 
+static void netif_set_gro_ipv6_max_size(struct net_device *dev,
+					unsigned int size)
+{
+	/* This pairs with the READ_ONCE() in skb_gro_receive() */
+	WRITE_ONCE(dev->gro_ipv6_max_size, size);
+}
+
 #define DO_SETLINK_MODIFIED	0x01
 /* notify flag means notify + modified. */
 #define DO_SETLINK_NOTIFY	0x03
@@ -2840,6 +2850,15 @@  static int do_setlink(const struct sk_buff *skb,
 		}
 	}
 
+	if (tb[IFLA_GRO_IPV6_MAX_SIZE]) {
+		u32 max_size = nla_get_u32(tb[IFLA_GRO_IPV6_MAX_SIZE]);
+
+		if (dev->gro_ipv6_max_size ^ max_size) {
+			netif_set_gro_ipv6_max_size(dev, max_size);
+			status |= DO_SETLINK_MODIFIED;
+		}
+	}
+
 	if (tb[IFLA_GSO_MAX_SEGS]) {
 		u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
 
@@ -3306,6 +3325,9 @@  struct net_device *rtnl_create_link(struct net *net, const char *ifname,
 	if (tb[IFLA_GSO_IPV6_MAX_SIZE])
 		netif_set_gso_ipv6_max_size(dev,
 			nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE]));
+	if (tb[IFLA_GRO_IPV6_MAX_SIZE])
+		netif_set_gro_ipv6_max_size(dev,
+			nla_get_u32(tb[IFLA_GRO_IPV6_MAX_SIZE]));
 
 	return dev;
 }
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 443eddd285f37198566fa1357f0d394ec5270ab9..5aead1be6b99623fb6ffd31cfcfd44976eb8794f 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -351,6 +351,7 @@  enum {
 	IFLA_TSO_MAX_SIZE,
 	IFLA_TSO_MAX_SEGS,
 	IFLA_GSO_IPV6_MAX_SIZE,
+	IFLA_GRO_IPV6_MAX_SIZE,
 
 	__IFLA_MAX
 };