diff mbox series

[v4,net-next,02/12] ipv6: add IFLA_GSO_IPV6_MAX_SIZE

Message ID 20220506153048.3695721-3-eric.dumazet@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series tcp: BIG TCP implementation | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4696 this patch: 4696
netdev/cc_maintainers warning 2 maintainers not CCed: petrm@nvidia.com liuhangbin@gmail.com
netdev/build_clang success Errors and warnings before: 1059 this patch: 1059
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4847 this patch: 4847
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: line length of 83 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet May 6, 2022, 3:30 p.m. UTC
From: Coco Li <lixiaoyan@google.com>

This enables ipv6/TCP stacks to build TSO packets bigger than
64KB if the driver is LSOv2 compatible.

This patch introduces new variable gso_ipv6_max_size
that is modifiable through ip link.

ip link set dev eth0 gso_ipv6_max_size 185000

User input is capped by driver limit (tso_max_size)

Signed-off-by: Coco Li <lixiaoyan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/linux/netdevice.h          |  2 ++
 include/uapi/linux/if_link.h       |  1 +
 net/core/dev.c                     |  2 ++
 net/core/rtnetlink.c               | 23 +++++++++++++++++++++++
 net/core/sock.c                    |  8 ++++++++
 tools/include/uapi/linux/if_link.h |  1 +
 6 files changed, 37 insertions(+)

Comments

Alexander Duyck May 6, 2022, 8:48 p.m. UTC | #1
On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> From: Coco Li <lixiaoyan@google.com>
> 
> This enables ipv6/TCP stacks to build TSO packets bigger than
> 64KB if the driver is LSOv2 compatible.
> 
> This patch introduces new variable gso_ipv6_max_size
> that is modifiable through ip link.
> 
> ip link set dev eth0 gso_ipv6_max_size 185000
> 
> User input is capped by driver limit (tso_max_size)
> 
> Signed-off-by: Coco Li <lixiaoyan@google.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

So I am still not a fan of adding all this extra tooling to make an
attribute that is just being applied to one protocol. Why not just
allow gso_max_size to extend beyond 64K and only limit it by
tso_max_size?

Doing that would make this patch much simpler as most of the code below
could be dropped.

> ---
>  include/linux/netdevice.h          |  2 ++
>  include/uapi/linux/if_link.h       |  1 +
>  net/core/dev.c                     |  2 ++
>  net/core/rtnetlink.c               | 23 +++++++++++++++++++++++
>  net/core/sock.c                    |  8 ++++++++
>  tools/include/uapi/linux/if_link.h |  1 +
>  6 files changed, 37 insertions(+)

<snip>

> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 512ed661204e0c66c8dcfaddc3001500d10f63ab..847cf80f81754451e5f220f846db734a7625695b 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c

<snip>

> @@ -2820,6 +2831,15 @@ static int do_setlink(const struct sk_buff *skb,
>  		}
>  	}
>  
> +	if (tb[IFLA_GSO_IPV6_MAX_SIZE]) {
> +		u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE]);
> +
> +		if (dev->gso_ipv6_max_size ^ max_size) {
> +			netif_set_gso_ipv6_max_size(dev, max_size);
> +			status |= DO_SETLINK_MODIFIED;
> +		}
> +	}
> +
>  	if (tb[IFLA_GSO_MAX_SEGS]) {
>  		u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
>  

So the this code wouldn't be needed but the block above where we are
doing the check for max_size > GSO_MAX_SIZE could be removed.

> @@ -3283,6 +3303,9 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
>  		netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS]));
>  	if (tb[IFLA_GRO_MAX_SIZE])
>  		netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE]));
> +	if (tb[IFLA_GSO_IPV6_MAX_SIZE])
> +		netif_set_gso_ipv6_max_size(dev,
> +			nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE]));
>  
>  	return dev;
>  }
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 6b287eb5427b32865d25fc22122fefeff3a4ccf5..4a29c3bf6b95f76280d8e32e903a0916322d5c4f 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2312,6 +2312,14 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
>  			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
>  			/* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
>  			sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
> +#if IS_ENABLED(CONFIG_IPV6)
> +			if (sk->sk_family == AF_INET6 &&
> +			    sk_is_tcp(sk) &&
> +			    !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
> +				/* Paired with WRITE_ONCE() in netif_set_gso_ipv6_max_size() */
> +				sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_ipv6_max_size);
> +			}
> +#endif
>  			sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
>  			/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
>  			max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);

This block here could then be rewritten as:
if (sk->sk_gso_max_size > GSO_MAX_SIZE &&
    (!IS_ENABLED(CONFIG_IPV6) || sk->sk_family != AF_INET6 || 
     !skb_is_tcp(sk) || ipv6_addr_v4mapped(&sk->sk_v6_rcf_saddr))
	sk->sk_gso_max_size = GSO_MAX_SIZE;

Then if we need protocol specific knobs in the future we could always
come back and make them act as caps instead of outright replacements
for the gso_max_size value.
Eric Dumazet May 6, 2022, 9:20 p.m. UTC | #2
On Fri, May 6, 2022 at 1:48 PM Alexander H Duyck
<alexander.duyck@gmail.com> wrote:
>
> On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> > From: Coco Li <lixiaoyan@google.com>
> >
> > This enables ipv6/TCP stacks to build TSO packets bigger than
> > 64KB if the driver is LSOv2 compatible.
> >
> > This patch introduces new variable gso_ipv6_max_size
> > that is modifiable through ip link.
> >
> > ip link set dev eth0 gso_ipv6_max_size 185000
> >
> > User input is capped by driver limit (tso_max_size)
> >
> > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
>
> So I am still not a fan of adding all this extra tooling to make an
> attribute that is just being applied to one protocol. Why not just
> allow gso_max_size to extend beyond 64K and only limit it by
> tso_max_size?

Answer is easy, and documented in our paper. Please read it.

We do not want to enable BIG TCP for IPv4, this breaks user space badly.

I do not want to break tcpdump just because some people think TCP just works.

>
> Doing that would make this patch much simpler as most of the code below
> could be dropped.
>

Sure, but no thanks.
Alexander Duyck May 6, 2022, 9:37 p.m. UTC | #3
On Fri, May 6, 2022 at 2:20 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Fri, May 6, 2022 at 1:48 PM Alexander H Duyck
> <alexander.duyck@gmail.com> wrote:
> >
> > On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> > > From: Coco Li <lixiaoyan@google.com>
> > >
> > > This enables ipv6/TCP stacks to build TSO packets bigger than
> > > 64KB if the driver is LSOv2 compatible.
> > >
> > > This patch introduces new variable gso_ipv6_max_size
> > > that is modifiable through ip link.
> > >
> > > ip link set dev eth0 gso_ipv6_max_size 185000
> > >
> > > User input is capped by driver limit (tso_max_size)
> > >
> > > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> >
> > So I am still not a fan of adding all this extra tooling to make an
> > attribute that is just being applied to one protocol. Why not just
> > allow gso_max_size to extend beyond 64K and only limit it by
> > tso_max_size?
>
> Answer is easy, and documented in our paper. Please read it.

I have read it.

> We do not want to enable BIG TCP for IPv4, this breaks user space badly.
>
> I do not want to break tcpdump just because some people think TCP just works.

The changes I suggested don't enable it for IPv4. What your current
code is doing now is using dev->gso_max_size and if it is the correct
IPv6 type you are using dev->gso_ipv6_max_size. What I am saying is
that instead of adding yet another netdev control you should just make
it so that we retain existing behavior when gso_max_size is less than
GSO_MAX_SIZE, and when it exceeds it all non-ipv6 types max out at
GSO_MAX_SIZE and only the ipv6 type packets use gso_max_size when you
exceed GSO_MAX_SIZE.

The big thing I am not a fan of is adding protocol level controls down
in the link level interface. It makes things confusing. For example,
say somebody has existing scripts to limit the gso_max_size and they
were using IPv6 and your new control is added. Suddenly the
gso_max_size limitations they were setting won't be applied because
you split things up at the protocol level.
Eric Dumazet May 6, 2022, 9:50 p.m. UTC | #4
On Fri, May 6, 2022 at 2:37 PM Alexander Duyck
<alexander.duyck@gmail.com> wrote:
>
> On Fri, May 6, 2022 at 2:20 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Fri, May 6, 2022 at 1:48 PM Alexander H Duyck
> > <alexander.duyck@gmail.com> wrote:
> > >
> > > On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> > > > From: Coco Li <lixiaoyan@google.com>
> > > >
> > > > This enables ipv6/TCP stacks to build TSO packets bigger than
> > > > 64KB if the driver is LSOv2 compatible.
> > > >
> > > > This patch introduces new variable gso_ipv6_max_size
> > > > that is modifiable through ip link.
> > > >
> > > > ip link set dev eth0 gso_ipv6_max_size 185000
> > > >
> > > > User input is capped by driver limit (tso_max_size)
> > > >
> > > > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > >
> > > So I am still not a fan of adding all this extra tooling to make an
> > > attribute that is just being applied to one protocol. Why not just
> > > allow gso_max_size to extend beyond 64K and only limit it by
> > > tso_max_size?
> >
> > Answer is easy, and documented in our paper. Please read it.
>
> I have read it.
>
> > We do not want to enable BIG TCP for IPv4, this breaks user space badly.
> >
> > I do not want to break tcpdump just because some people think TCP just works.
>
> The changes I suggested don't enable it for IPv4. What your current
> code is doing now is using dev->gso_max_size and if it is the correct
> IPv6 type you are using dev->gso_ipv6_max_size. What I am saying is
> that instead of adding yet another netdev control you should just make
> it so that we retain existing behavior when gso_max_size is less than
> GSO_MAX_SIZE, and when it exceeds it all non-ipv6 types max out at
> GSO_MAX_SIZE and only the ipv6 type packets use gso_max_size when you
> exceed GSO_MAX_SIZE.

gso_max_size can not exceed GSO_MAX_SIZE.
This will break many drivers.
I do not want to change hundreds of them.

Look, we chose this implementation so that chances of breaking things
are very small.
I understand this is frustrating, but I suggest you take the
responsibility of breaking things,
and not add this on us.
Alexander Duyck May 6, 2022, 10:16 p.m. UTC | #5
On Fri, May 6, 2022 at 2:50 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Fri, May 6, 2022 at 2:37 PM Alexander Duyck
> <alexander.duyck@gmail.com> wrote:
> >
> > On Fri, May 6, 2022 at 2:20 PM Eric Dumazet <edumazet@google.com> wrote:
> > >
> > > On Fri, May 6, 2022 at 1:48 PM Alexander H Duyck
> > > <alexander.duyck@gmail.com> wrote:
> > > >
> > > > On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> > > > > From: Coco Li <lixiaoyan@google.com>
> > > > >
> > > > > This enables ipv6/TCP stacks to build TSO packets bigger than
> > > > > 64KB if the driver is LSOv2 compatible.
> > > > >
> > > > > This patch introduces new variable gso_ipv6_max_size
> > > > > that is modifiable through ip link.
> > > > >
> > > > > ip link set dev eth0 gso_ipv6_max_size 185000
> > > > >
> > > > > User input is capped by driver limit (tso_max_size)
> > > > >
> > > > > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > >
> > > > So I am still not a fan of adding all this extra tooling to make an
> > > > attribute that is just being applied to one protocol. Why not just
> > > > allow gso_max_size to extend beyond 64K and only limit it by
> > > > tso_max_size?
> > >
> > > Answer is easy, and documented in our paper. Please read it.
> >
> > I have read it.
> >
> > > We do not want to enable BIG TCP for IPv4, this breaks user space badly.
> > >
> > > I do not want to break tcpdump just because some people think TCP just works.
> >
> > The changes I suggested don't enable it for IPv4. What your current
> > code is doing now is using dev->gso_max_size and if it is the correct
> > IPv6 type you are using dev->gso_ipv6_max_size. What I am saying is
> > that instead of adding yet another netdev control you should just make
> > it so that we retain existing behavior when gso_max_size is less than
> > GSO_MAX_SIZE, and when it exceeds it all non-ipv6 types max out at
> > GSO_MAX_SIZE and only the ipv6 type packets use gso_max_size when you
> > exceed GSO_MAX_SIZE.
>
> gso_max_size can not exceed GSO_MAX_SIZE.
> This will break many drivers.
> I do not want to change hundreds of them.

Most drivers will not be impacted because they cannot exceed
tso_max_size. The tso_max_size is the limit, not GSO_MAX_SIZE. Last I
knew this patch set is overwriting that value to increase it beyond
the legacy limits.

Right now the check is:
if (max_size > GSO_MAX_SIZE || || max_size > dev->tso_max_size)

What I am suggesting is that tso_max_size be used as the only limit,
which is already defaulted to cap out at TSO_LEGACY_MAX_SIZE. So just
remove the "max_size > GSO_MAX_SIZE ||" portion of the call. Then when
you call netif_set_tso_max_size in the driver to enable jumbograms you
are good to set gso_max_size to something larger than the standard
65536.

> Look, we chose this implementation so that chances of breaking things
> are very small.
> I understand this is frustrating, but I suggest you take the
> responsibility of breaking things,
> and not add this on us.

What I have been trying to point out is your patch set will break things.

For all those cases out there where people are using gso_max_size to
limit things you just poked a hole in that for IPv6 cases. What I am
suggesting is that we don't do that as it will be likely to trigger a
number of problems for people.

The primary reason gso_max_size was added was because there are cases
out there where doing too big of a TSO was breaking things. For
devices that are being used for LSOv2 I highly doubt they need to
worry about cases less than 65536. As such they can just max out at
65536 for all non-IPv6 traffic and instead use gso_max_size as the
limit for the IPv6/TSO case.
Eric Dumazet May 6, 2022, 10:25 p.m. UTC | #6
On Fri, May 6, 2022 at 3:16 PM Alexander Duyck
<alexander.duyck@gmail.com> wrote:
>
> On Fri, May 6, 2022 at 2:50 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Fri, May 6, 2022 at 2:37 PM Alexander Duyck
> > <alexander.duyck@gmail.com> wrote:
> > >
> > > On Fri, May 6, 2022 at 2:20 PM Eric Dumazet <edumazet@google.com> wrote:
> > > >
> > > > On Fri, May 6, 2022 at 1:48 PM Alexander H Duyck
> > > > <alexander.duyck@gmail.com> wrote:
> > > > >
> > > > > On Fri, 2022-05-06 at 08:30 -0700, Eric Dumazet wrote:
> > > > > > From: Coco Li <lixiaoyan@google.com>
> > > > > >
> > > > > > This enables ipv6/TCP stacks to build TSO packets bigger than
> > > > > > 64KB if the driver is LSOv2 compatible.
> > > > > >
> > > > > > This patch introduces new variable gso_ipv6_max_size
> > > > > > that is modifiable through ip link.
> > > > > >
> > > > > > ip link set dev eth0 gso_ipv6_max_size 185000
> > > > > >
> > > > > > User input is capped by driver limit (tso_max_size)
> > > > > >
> > > > > > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > > > > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > > >
> > > > > So I am still not a fan of adding all this extra tooling to make an
> > > > > attribute that is just being applied to one protocol. Why not just
> > > > > allow gso_max_size to extend beyond 64K and only limit it by
> > > > > tso_max_size?
> > > >
> > > > Answer is easy, and documented in our paper. Please read it.
> > >
> > > I have read it.
> > >
> > > > We do not want to enable BIG TCP for IPv4, this breaks user space badly.
> > > >
> > > > I do not want to break tcpdump just because some people think TCP just works.
> > >
> > > The changes I suggested don't enable it for IPv4. What your current
> > > code is doing now is using dev->gso_max_size and if it is the correct
> > > IPv6 type you are using dev->gso_ipv6_max_size. What I am saying is
> > > that instead of adding yet another netdev control you should just make
> > > it so that we retain existing behavior when gso_max_size is less than
> > > GSO_MAX_SIZE, and when it exceeds it all non-ipv6 types max out at
> > > GSO_MAX_SIZE and only the ipv6 type packets use gso_max_size when you
> > > exceed GSO_MAX_SIZE.
> >
> > gso_max_size can not exceed GSO_MAX_SIZE.
> > This will break many drivers.
> > I do not want to change hundreds of them.
>
> Most drivers will not be impacted because they cannot exceed
> tso_max_size. The tso_max_size is the limit, not GSO_MAX_SIZE. Last I
> knew this patch set is overwriting that value to increase it beyond
> the legacy limits.
>
> Right now the check is:
> if (max_size > GSO_MAX_SIZE || || max_size > dev->tso_max_size)
>

This is Jakub patch, already merged. Nothing to do with BIG TCP ?


> What I am suggesting is that tso_max_size be used as the only limit,
> which is already defaulted to cap out at TSO_LEGACY_MAX_SIZE. So just
> remove the "max_size > GSO_MAX_SIZE ||" portion of the call. Then when
> you call netif_set_tso_max_size in the driver to enable jumbograms you
> are good to set gso_max_size to something larger than the standard
> 65536.

Again, this was Jakub patch, right ?

>
> > Look, we chose this implementation so that chances of breaking things
> > are very small.
> > I understand this is frustrating, but I suggest you take the
> > responsibility of breaking things,
> > and not add this on us.
>
> What I have been trying to point out is your patch set will break things.
>

Can you give a concrete example ?

> For all those cases out there where people are using gso_max_size to
> limit things you just poked a hole in that for IPv6 cases. What I am
> suggesting is that we don't do that as it will be likely to trigger a
> number of problems for people.

No, because legacy drivers won't  use BIG TCP.

dev->tso_max_size will be <= 65536 for them.

Look at netif_set_gso_ipv6_max_size() logic.

>
> The primary reason gso_max_size was added was because there are cases
> out there where doing too big of a TSO was breaking things. For
> devices that are being used for LSOv2 I highly doubt they need to
> worry about cases less than 65536. As such they can just max out at
> 65536 for all non-IPv6 traffic and instead use gso_max_size as the
> limit for the IPv6/TSO case.

I think we disagree completely.
Jakub Kicinski May 6, 2022, 10:26 p.m. UTC | #7
On Fri, 6 May 2022 15:16:21 -0700 Alexander Duyck wrote:
> On Fri, May 6, 2022 at 2:50 PM Eric Dumazet <edumazet@google.com> wrote:
> > gso_max_size can not exceed GSO_MAX_SIZE.
> > This will break many drivers.
> > I do not want to change hundreds of them.  
> 
> Most drivers will not be impacted because they cannot exceed
> tso_max_size. The tso_max_size is the limit, not GSO_MAX_SIZE. Last I
> knew this patch set is overwriting that value to increase it beyond
> the legacy limits.
> 
> Right now the check is:
> if (max_size > GSO_MAX_SIZE || max_size > dev->tso_max_size)
> 
> What I am suggesting is that tso_max_size be used as the only limit,
> which is already defaulted to cap out at TSO_LEGACY_MAX_SIZE. So just
> remove the "max_size > GSO_MAX_SIZE ||" portion of the call. Then when
> you call netif_set_tso_max_size in the driver to enable jumbograms you
> are good to set gso_max_size to something larger than the standard
> 65536.

TBH that was my expectation as well.

Drivers should not pay any attention to dev->gso_* any longer.

> > Look, we chose this implementation so that chances of breaking things
> > are very small.
> > I understand this is frustrating, but I suggest you take the
> > responsibility of breaking things,
> > and not add this on us.  
> 
> What I have been trying to point out is your patch set will break things.
> 
> For all those cases out there where people are using gso_max_size to
> limit things you just poked a hole in that for IPv6 cases. What I am
> suggesting is that we don't do that as it will be likely to trigger a
> number of problems for people.
> 
> The primary reason gso_max_size was added was because there are cases
> out there where doing too big of a TSO was breaking things. For
> devices that are being used for LSOv2 I highly doubt they need to
> worry about cases less than 65536. As such they can just max out at
> 65536 for all non-IPv6 traffic and instead use gso_max_size as the
> limit for the IPv6/TSO case.

Good point. GSO limit is expected to be a cap, so we shouldn't go above
it. At the same time nothing wrong with IPv4 continuing to generate 64k
GSOs after the user raises the limit.
Alexander Duyck May 6, 2022, 10:46 p.m. UTC | #8
On Fri, May 6, 2022 at 3:26 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Fri, 6 May 2022 15:16:21 -0700 Alexander Duyck wrote:
> > On Fri, May 6, 2022 at 2:50 PM Eric Dumazet <edumazet@google.com> wrote:
> > > gso_max_size can not exceed GSO_MAX_SIZE.
> > > This will break many drivers.
> > > I do not want to change hundreds of them.
> >
> > Most drivers will not be impacted because they cannot exceed
> > tso_max_size. The tso_max_size is the limit, not GSO_MAX_SIZE. Last I
> > knew this patch set is overwriting that value to increase it beyond
> > the legacy limits.
> >
> > Right now the check is:
> > if (max_size > GSO_MAX_SIZE || max_size > dev->tso_max_size)
> >
> > What I am suggesting is that tso_max_size be used as the only limit,
> > which is already defaulted to cap out at TSO_LEGACY_MAX_SIZE. So just
> > remove the "max_size > GSO_MAX_SIZE ||" portion of the call. Then when
> > you call netif_set_tso_max_size in the driver to enable jumbograms you
> > are good to set gso_max_size to something larger than the standard
> > 65536.
>
> TBH that was my expectation as well.
>
> Drivers should not pay any attention to dev->gso_* any longer.

Right. However, there are a few protocol items that it looks like do
need to be addressed as SCTP and FCoE appear to be accessing it raw
without any wrappers. So there will be more work than what I called
out to deal with as those would probably need to be wrapped in a min()
function call using the legacy max.

I can take a look at generating the patches if we really want to go
down that route, but it will take me a day or two to get it coded up
as I don't have a ton of free time to work on side projects.
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8cf0ac616cb9b7279e643cf6630f42c807742244..47f413dac12e901700045f4b73d47ecdca0f4f3c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1917,6 +1917,7 @@  enum netdev_ml_priv_type {
  *	@rtnl_link_ops:	Rtnl_link_ops
  *
  *	@gso_max_size:	Maximum size of generic segmentation offload
+ *	@gso_ipv6_max_size:	Maximum size of IPv6 GSO packets (user/admin limit)
  *	@tso_max_size:	Device (as in HW) limit on the max TSO request size
  *	@gso_max_segs:	Maximum number of segments that can be passed to the
  *			NIC for GSO
@@ -2264,6 +2265,7 @@  struct net_device {
 	/* for setting kernel sock attribute on TCP connection setup */
 #define GSO_MAX_SIZE		65536
 	unsigned int		gso_max_size;
+	unsigned int		gso_ipv6_max_size;
 #define TSO_LEGACY_MAX_SIZE	65536
 #define TSO_MAX_SIZE		UINT_MAX
 	unsigned int		tso_max_size;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 5f58dcfe2787f308bb2aa5777cca0816dd32bbb9..aa05fc9cc23f4ccf92f4cbba57f43472749cd42a 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -370,6 +370,7 @@  enum {
 	IFLA_GRO_MAX_SIZE,
 	IFLA_TSO_MAX_SIZE,
 	IFLA_TSO_MAX_SEGS,
+	IFLA_GSO_IPV6_MAX_SIZE,
 
 	__IFLA_MAX
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index f036ccb61da4da3ffc52c4f2402427054b831e8a..aa8757215b2a9f14683f95086732668eb99a875b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10607,6 +10607,8 @@  struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->gro_max_size = GRO_MAX_SIZE;
 	dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
 	dev->tso_max_segs = TSO_MAX_SEGS;
+	dev->gso_ipv6_max_size = GSO_MAX_SIZE;
+
 	dev->upper_level = 1;
 	dev->lower_level = 1;
 #ifdef CONFIG_LOCKDEP
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 512ed661204e0c66c8dcfaddc3001500d10f63ab..847cf80f81754451e5f220f846db734a7625695b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1066,6 +1066,7 @@  static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
 	       + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */
 	       + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */
+	       + nla_total_size(4) /* IFLA_GSO_IPV6_MAX_SIZE */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -1773,6 +1774,7 @@  static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	    nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
 	    nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
 	    nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
+	    nla_put_u32(skb, IFLA_GSO_IPV6_MAX_SIZE, dev->gso_ipv6_max_size) ||
 #ifdef CONFIG_RPS
 	    nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
 #endif
@@ -1928,6 +1930,7 @@  static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_GRO_MAX_SIZE]	= { .type = NLA_U32 },
 	[IFLA_TSO_MAX_SIZE]	= { .type = NLA_REJECT },
 	[IFLA_TSO_MAX_SEGS]	= { .type = NLA_REJECT },
+	[IFLA_GSO_IPV6_MAX_SIZE] = { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2644,6 +2647,14 @@  static int do_set_proto_down(struct net_device *dev,
 	return 0;
 }
 
+static void netif_set_gso_ipv6_max_size(struct net_device *dev,
+					unsigned int size)
+{
+	size = min(size, dev->tso_max_size);
+	/* Paired with READ_ONCE() in sk_setup_caps() */
+	WRITE_ONCE(dev->gso_ipv6_max_size, size);
+}
+
 #define DO_SETLINK_MODIFIED	0x01
 /* notify flag means notify + modified. */
 #define DO_SETLINK_NOTIFY	0x03
@@ -2820,6 +2831,15 @@  static int do_setlink(const struct sk_buff *skb,
 		}
 	}
 
+	if (tb[IFLA_GSO_IPV6_MAX_SIZE]) {
+		u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE]);
+
+		if (dev->gso_ipv6_max_size ^ max_size) {
+			netif_set_gso_ipv6_max_size(dev, max_size);
+			status |= DO_SETLINK_MODIFIED;
+		}
+	}
+
 	if (tb[IFLA_GSO_MAX_SEGS]) {
 		u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
 
@@ -3283,6 +3303,9 @@  struct net_device *rtnl_create_link(struct net *net, const char *ifname,
 		netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS]));
 	if (tb[IFLA_GRO_MAX_SIZE])
 		netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE]));
+	if (tb[IFLA_GSO_IPV6_MAX_SIZE])
+		netif_set_gso_ipv6_max_size(dev,
+			nla_get_u32(tb[IFLA_GSO_IPV6_MAX_SIZE]));
 
 	return dev;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 6b287eb5427b32865d25fc22122fefeff3a4ccf5..4a29c3bf6b95f76280d8e32e903a0916322d5c4f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2312,6 +2312,14 @@  void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
 			/* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
 			sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+#if IS_ENABLED(CONFIG_IPV6)
+			if (sk->sk_family == AF_INET6 &&
+			    sk_is_tcp(sk) &&
+			    !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
+				/* Paired with WRITE_ONCE() in netif_set_gso_ipv6_max_size() */
+				sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_ipv6_max_size);
+			}
+#endif
 			sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
 			/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
 			max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index b339bf2196ca160ed3040615ae624b9a028562fb..443eddd285f37198566fa1357f0d394ec5270ab9 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -350,6 +350,7 @@  enum {
 	IFLA_GRO_MAX_SIZE,
 	IFLA_TSO_MAX_SIZE,
 	IFLA_TSO_MAX_SEGS,
+	IFLA_GSO_IPV6_MAX_SIZE,
 
 	__IFLA_MAX
 };