diff mbox series

[PATCHv4,net-next,09/10] net: add gso_ipv4_max_size and gro_ipv4_max_size per device

Message ID 7e1f733cc96c7f7658fbf3276a90281b2f37acd1.1674921359.git.lucien.xin@gmail.com (mailing list archive)
State Accepted
Commit 9eefedd58ae1daece2ba907849a44db2941fb4b0
Delegated to: Netdev Maintainers
Headers show
Series net: support ipv4 big tcp | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4349 this patch: 4349
netdev/cc_maintainers warning 3 maintainers not CCed: petrm@nvidia.com liuhangbin@gmail.com idosch@nvidia.com
netdev/build_clang success Errors and warnings before: 1020 this patch: 1020
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4559 this patch: 4559
netdev/checkpatch warning WARNING: line length of 90 exceeds 80 columns WARNING: please, no space before tabs
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xin Long Jan. 28, 2023, 3:58 p.m. UTC
This patch introduces gso_ipv4_max_size and gro_ipv4_max_size
per device and adds netlink attributes for them, so that IPV4
BIG TCP can be guarded by a separate tunable in the next patch.

To not break the old application using "gso/gro_max_size" for
IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size"
in netif_set_gso/gro_max_size() if the new size isn't greater
than GSO_LEGACY_MAX_SIZE, so that nothing will change even if
userspace doesn't realize the new netlink attributes.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 include/linux/netdevice.h    |  6 ++++++
 include/uapi/linux/if_link.h |  3 +++
 net/core/dev.c               |  4 ++++
 net/core/dev.h               | 18 ++++++++++++++++++
 net/core/rtnetlink.c         | 33 +++++++++++++++++++++++++++++++++
 5 files changed, 64 insertions(+)

Comments

Paolo Abeni Jan. 31, 2023, 2:59 p.m. UTC | #1
On Sat, 2023-01-28 at 10:58 -0500, Xin Long wrote:
> This patch introduces gso_ipv4_max_size and gro_ipv4_max_size
> per device and adds netlink attributes for them, so that IPV4
> BIG TCP can be guarded by a separate tunable in the next patch.
> 
> To not break the old application using "gso/gro_max_size" for
> IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size"
> in netif_set_gso/gro_max_size() if the new size isn't greater
> than GSO_LEGACY_MAX_SIZE, so that nothing will change even if
> userspace doesn't realize the new netlink attributes.

Not a big deal, but I think it would be nice to include the pahole info
showing where the new fields are located and why that are good
locations.

No need to send a new version for just for the above, unless Eric asks
otherwise ;)

Cheers,

Paolo
Xin Long Jan. 31, 2023, 5:55 p.m. UTC | #2
On Tue, Jan 31, 2023 at 9:59 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Sat, 2023-01-28 at 10:58 -0500, Xin Long wrote:
> > This patch introduces gso_ipv4_max_size and gro_ipv4_max_size
> > per device and adds netlink attributes for them, so that IPV4
> > BIG TCP can be guarded by a separate tunable in the next patch.
> >
> > To not break the old application using "gso/gro_max_size" for
> > IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size"
> > in netif_set_gso/gro_max_size() if the new size isn't greater
> > than GSO_LEGACY_MAX_SIZE, so that nothing will change even if
> > userspace doesn't realize the new netlink attributes.
>
> Not a big deal, but I think it would be nice to include the pahole info
> showing where the new fields are located and why that are good
> locations.
>
> No need to send a new version for just for the above, unless Eric asks
> otherwise ;)
>
The the pahole info without and with the patch shows below:

- Without the Patch:

# pahole --hex -C net_device vmlinux
struct net_device {
...
long unsigned int          gro_flush_timeout;    /* 0x330   0x8 */
int                        napi_defer_hard_irqs; /* 0x338   0x4 */
unsigned int               gro_max_size;         /* 0x33c   0x4 */  <---------
/* --- cacheline 13 boundary (832 bytes) --- */
rx_handler_func_t *        rx_handler;           /* 0x340   0x8 */
void *                     rx_handler_data;      /* 0x348   0x8 */
struct mini_Qdisc *        miniq_ingress;        /* 0x350   0x8 */
struct netdev_queue *      ingress_queue;        /* 0x358   0x8 */
struct nf_hook_entries *   nf_hooks_ingress;     /* 0x360   0x8 */
unsigned char              broadcast[32];        /* 0x368  0x20 */
/* --- cacheline 14 boundary (896 bytes) was 8 bytes ago --- */
struct cpu_rmap *          rx_cpu_rmap;          /* 0x388   0x8 */
struct hlist_node          index_hlist;          /* 0x390  0x10 */

/* XXX 32 bytes hole, try to pack */

/* --- cacheline 15 boundary (960 bytes) --- */
struct netdev_queue *      _tx __attribute__((__aligned__(64))); /*
0x3c0   0x8 */
...

/* --- cacheline 32 boundary (2048 bytes) was 24 bytes ago --- */
const struct attribute_group  * sysfs_groups[4]; /* 0x818  0x20 */
const struct attribute_group  * sysfs_rx_queue_group; /* 0x838   0x8 */
/* --- cacheline 33 boundary (2112 bytes) --- */
const struct rtnl_link_ops  * rtnl_link_ops;     /* 0x840   0x8 */
unsigned int               gso_max_size;         /* 0x848   0x4 */
unsigned int               tso_max_size;         /* 0x84c   0x4 */
u16                        gso_max_segs;         /* 0x850   0x2 */
u16                        tso_max_segs;         /* 0x852   0x2 */   <---------

/* XXX 4 bytes hole, try to pack */

const struct dcbnl_rtnl_ops  * dcbnl_ops;        /* 0x858   0x8 */
s16                        num_tc;               /* 0x860   0x2 */
struct netdev_tc_txq       tc_to_txq[16];        /* 0x862  0x40 */
/* --- cacheline 34 boundary (2176 bytes) was 34 bytes ago --- */
u8                         prio_tc_map[16];      /* 0x8a2  0x10 */
...
}


- With the Patch:

For "gso_ipv4_max_size", it filled the hole as expected.

/* --- cacheline 33 boundary (2112 bytes) --- */
const struct rtnl_link_ops  * rtnl_link_ops;     /* 0x840   0x8 */
unsigned int               gso_max_size;         /* 0x848   0x4 */
unsigned int               tso_max_size;         /* 0x84c   0x4 */
u16                        gso_max_segs;         /* 0x850   0x2 */
u16                        tso_max_segs;         /* 0x852   0x2 */
unsigned int               gso_ipv4_max_size;    /* 0x854   0x4 */ <-------
const struct dcbnl_rtnl_ops  * dcbnl_ops;        /* 0x858   0x8 */
s16                        num_tc;               /* 0x860   0x2 */
struct netdev_tc_txq       tc_to_txq[16];        /* 0x862  0x40 */
/* --- cacheline 34 boundary (2176 bytes) was 34 bytes ago --- */
u8                         prio_tc_map[16];      /* 0x8a2  0x10 */


For "gro_ipv4_max_size", these are no byte holes, I just put it
in the "Cache lines mostly used on receive path" area, and
next to gro_max_size.

long unsigned int          gro_flush_timeout;    /* 0x330   0x8 */
int                        napi_defer_hard_irqs; /* 0x338   0x4 */
unsigned int               gro_max_size;         /* 0x33c   0x4 */
/* --- cacheline 13 boundary (832 bytes) --- */
unsigned int               gro_ipv4_max_size;    /* 0x340   0x4 */  <------

/* XXX 4 bytes hole, try to pack */

rx_handler_func_t *        rx_handler;           /* 0x348   0x8 */
void *                     rx_handler_data;      /* 0x350   0x8 */
struct mini_Qdisc *        miniq_ingress;        /* 0x358   0x8 */
struct netdev_queue *      ingress_queue;        /* 0x360   0x8 */
struct nf_hook_entries *   nf_hooks_ingress;     /* 0x368   0x8 */
unsigned char              broadcast[32];        /* 0x370  0x20 */
/* --- cacheline 14 boundary (896 bytes) was 16 bytes ago --- */
struct cpu_rmap *          rx_cpu_rmap;          /* 0x390   0x8 */
struct hlist_node          index_hlist;          /* 0x398  0x10 */

/* XXX 24 bytes hole, try to pack */

/* --- cacheline 15 boundary (960 bytes) --- */
struct netdev_queue *      _tx __attribute__((__aligned__(64))); /*
0x3c0   0x8 */


Thanks.
David Ahern Feb. 1, 2023, 3:36 p.m. UTC | #3
On 1/28/23 8:58 AM, Xin Long wrote:
> This patch introduces gso_ipv4_max_size and gro_ipv4_max_size
> per device and adds netlink attributes for them, so that IPV4
> BIG TCP can be guarded by a separate tunable in the next patch.
> 
> To not break the old application using "gso/gro_max_size" for
> IPv4 GSO packets, this patch updates "gso/gro_ipv4_max_size"
> in netif_set_gso/gro_max_size() if the new size isn't greater
> than GSO_LEGACY_MAX_SIZE, so that nothing will change even if
> userspace doesn't realize the new netlink attributes.
> 
> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> ---
>  include/linux/netdevice.h    |  6 ++++++
>  include/uapi/linux/if_link.h |  3 +++
>  net/core/dev.c               |  4 ++++
>  net/core/dev.h               | 18 ++++++++++++++++++
>  net/core/rtnetlink.c         | 33 +++++++++++++++++++++++++++++++++
>  5 files changed, 64 insertions(+)
> 

Reviewed-by: David Ahern <dsahern@kernel.org>
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2466afa25078..d5ef4c1fedd2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1964,6 +1964,8 @@  enum netdev_ml_priv_type {
  *	@gso_max_segs:	Maximum number of segments that can be passed to the
  *			NIC for GSO
  *	@tso_max_segs:	Device (as in HW) limit on the max TSO segment count
+ * 	@gso_ipv4_max_size:	Maximum size of generic segmentation offload,
+ * 				for IPv4.
  *
  *	@dcbnl_ops:	Data Center Bridging netlink ops
  *	@num_tc:	Number of traffic classes in the net device
@@ -2004,6 +2006,8 @@  enum netdev_ml_priv_type {
  *			keep a list of interfaces to be deleted.
  *	@gro_max_size:	Maximum size of aggregated packet in generic
  *			receive offload (GRO)
+ * 	@gro_ipv4_max_size:	Maximum size of aggregated packet in generic
+ * 				receive offload (GRO), for IPv4.
  *
  *	@dev_addr_shadow:	Copy of @dev_addr to catch direct writes.
  *	@linkwatch_dev_tracker:	refcount tracker used by linkwatch.
@@ -2207,6 +2211,7 @@  struct net_device {
  */
 #define GRO_MAX_SIZE		(8 * 65535u)
 	unsigned int		gro_max_size;
+	unsigned int		gro_ipv4_max_size;
 	rx_handler_func_t __rcu	*rx_handler;
 	void __rcu		*rx_handler_data;
 
@@ -2330,6 +2335,7 @@  struct net_device {
 	u16			gso_max_segs;
 #define TSO_MAX_SEGS		U16_MAX
 	u16			tso_max_segs;
+	unsigned int		gso_ipv4_max_size;
 
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 1021a7e47a86..02b87e4c65be 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -374,6 +374,9 @@  enum {
 
 	IFLA_DEVLINK_PORT,
 
+	IFLA_GSO_IPV4_MAX_SIZE,
+	IFLA_GRO_IPV4_MAX_SIZE,
+
 	__IFLA_MAX
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index f72f5c4ee7e2..bb42150a38ec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3001,6 +3001,8 @@  void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
 	dev->tso_max_size = min(GSO_MAX_SIZE, size);
 	if (size < READ_ONCE(dev->gso_max_size))
 		netif_set_gso_max_size(dev, size);
+	if (size < READ_ONCE(dev->gso_ipv4_max_size))
+		netif_set_gso_ipv4_max_size(dev, size);
 }
 EXPORT_SYMBOL(netif_set_tso_max_size);
 
@@ -10614,6 +10616,8 @@  struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
 	dev->gso_max_segs = GSO_MAX_SEGS;
 	dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
+	dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
+	dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE;
 	dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
 	dev->tso_max_segs = TSO_MAX_SEGS;
 	dev->upper_level = 1;
diff --git a/net/core/dev.h b/net/core/dev.h
index 814ed5b7b960..a065b7571441 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -100,6 +100,8 @@  static inline void netif_set_gso_max_size(struct net_device *dev,
 {
 	/* dev->gso_max_size is read locklessly from sk_setup_caps() */
 	WRITE_ONCE(dev->gso_max_size, size);
+	if (size <= GSO_LEGACY_MAX_SIZE)
+		WRITE_ONCE(dev->gso_ipv4_max_size, size);
 }
 
 static inline void netif_set_gso_max_segs(struct net_device *dev,
@@ -114,6 +116,22 @@  static inline void netif_set_gro_max_size(struct net_device *dev,
 {
 	/* This pairs with the READ_ONCE() in skb_gro_receive() */
 	WRITE_ONCE(dev->gro_max_size, size);
+	if (size <= GRO_LEGACY_MAX_SIZE)
+		WRITE_ONCE(dev->gro_ipv4_max_size, size);
+}
+
+static inline void netif_set_gso_ipv4_max_size(struct net_device *dev,
+					       unsigned int size)
+{
+	/* dev->gso_ipv4_max_size is read locklessly from sk_setup_caps() */
+	WRITE_ONCE(dev->gso_ipv4_max_size, size);
+}
+
+static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
+					       unsigned int size)
+{
+	/* This pairs with the READ_ONCE() in skb_gro_receive() */
+	WRITE_ONCE(dev->gro_ipv4_max_size, size);
 }
 
 #endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 64289bc98887..b9f584955b77 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1074,6 +1074,8 @@  static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
 	       + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
 	       + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
+	       + nla_total_size(4) /* IFLA_GSO_IPV4_MAX_SIZE */
+	       + nla_total_size(4) /* IFLA_GRO_IPV4_MAX_SIZE */
 	       + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */
 	       + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
@@ -1807,6 +1809,8 @@  static int rtnl_fill_ifinfo(struct sk_buff *skb,
 	    nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
 	    nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
 	    nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
+	    nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) ||
+	    nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) ||
 	    nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
 	    nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
 #ifdef CONFIG_RPS
@@ -1968,6 +1972,8 @@  static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_TSO_MAX_SIZE]	= { .type = NLA_REJECT },
 	[IFLA_TSO_MAX_SEGS]	= { .type = NLA_REJECT },
 	[IFLA_ALLMULTI]		= { .type = NLA_REJECT },
+	[IFLA_GSO_IPV4_MAX_SIZE]	= { .type = NLA_U32 },
+	[IFLA_GRO_IPV4_MAX_SIZE]	= { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2883,6 +2889,29 @@  static int do_setlink(const struct sk_buff *skb,
 		}
 	}
 
+	if (tb[IFLA_GSO_IPV4_MAX_SIZE]) {
+		u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]);
+
+		if (max_size > dev->tso_max_size) {
+			err = -EINVAL;
+			goto errout;
+		}
+
+		if (dev->gso_ipv4_max_size ^ max_size) {
+			netif_set_gso_ipv4_max_size(dev, max_size);
+			status |= DO_SETLINK_MODIFIED;
+		}
+	}
+
+	if (tb[IFLA_GRO_IPV4_MAX_SIZE]) {
+		u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]);
+
+		if (dev->gro_ipv4_max_size ^ gro_max_size) {
+			netif_set_gro_ipv4_max_size(dev, gro_max_size);
+			status |= DO_SETLINK_MODIFIED;
+		}
+	}
+
 	if (tb[IFLA_OPERSTATE])
 		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
 
@@ -3325,6 +3354,10 @@  struct net_device *rtnl_create_link(struct net *net, const char *ifname,
 		netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS]));
 	if (tb[IFLA_GRO_MAX_SIZE])
 		netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE]));
+	if (tb[IFLA_GSO_IPV4_MAX_SIZE])
+		netif_set_gso_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]));
+	if (tb[IFLA_GRO_IPV4_MAX_SIZE])
+		netif_set_gro_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]));
 
 	return dev;
 }