diff mbox series

[v4,net-next,4/6] netns-ipv4: reorganize netns_ipv4 fast path variables

Message ID 20231026081959.3477034-5-lixiaoyan@google.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Analyze and Reorganize core Networking Structs to optimize cacheline consumption | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5697 this patch: 5697
netdev/cc_maintainers warning 9 maintainers not CCed: kuniyu@amazon.com w@1wt.eu jlayton@kernel.org linux@weissschuh.net linux-fsdevel@vger.kernel.org ycheng@google.com davem@davemloft.net brauner@kernel.org jack@suse.cz
netdev/build_clang success Errors and warnings before: 1691 this patch: 1691
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6060 this patch: 6060
netdev/checkpatch warning WARNING: Missing a blank line after declarations
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Coco Li Oct. 26, 2023, 8:19 a.m. UTC
Reorganize fast path variables on tx-txrx-rx order.
Fastpath cacheline ends after sysctl_tcp_rmem.
There are only read-only variables here. (write is on the control path
and not considered in this case)

Below data generated with pahole on x86 architecture.
Fast path variables span cache lines before change: 4
Fast path variables span cache lines after change: 2

Signed-off-by: Coco Li <lixiaoyan@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Wei Wang <weiwan@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
---
 fs/proc/proc_net.c       | 39 ++++++++++++++++++++++++++++++++++++
 include/net/netns/ipv4.h | 43 ++++++++++++++++++++++++++--------------
 2 files changed, 67 insertions(+), 15 deletions(-)

Comments

Eric Dumazet Oct. 26, 2023, 9:45 a.m. UTC | #1
On Thu, Oct 26, 2023 at 10:20 AM Coco Li <lixiaoyan@google.com> wrote:
>
> Reorganize fast path variables on tx-txrx-rx order.
> Fastpath cacheline ends after sysctl_tcp_rmem.
> There are only read-only variables here. (write is on the control path
> and not considered in this case)
>
> Below data generated with pahole on x86 architecture.
> Fast path variables span cache lines before change: 4
> Fast path variables span cache lines after change: 2
>
> Signed-off-by: Coco Li <lixiaoyan@google.com>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Reviewed-by: Wei Wang <weiwan@google.com>
> Reviewed-by: David Ahern <dsahern@kernel.org>
> ---
>  fs/proc/proc_net.c       | 39 ++++++++++++++++++++++++++++++++++++
>  include/net/netns/ipv4.h | 43 ++++++++++++++++++++++++++--------------
>  2 files changed, 67 insertions(+), 15 deletions(-)
>
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 2ba31b6d68c07..38846be34acd9 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -344,6 +344,43 @@ const struct file_operations proc_net_operations = {
>         .iterate_shared = proc_tgid_net_readdir,
>  };
>
> +static void __init netns_ipv4_struct_check(void)
> +{
> +       /* TX readonly hotpath cache lines */
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_early_retrans);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_tso_win_divisor);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_tso_rtt_log);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_autocorking);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_min_snd_mss);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_notsent_lowat);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_limit_output_bytes);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_min_rtt_wlen);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_wmem);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_ip_fwd_use_pmtu);
> +       /* TXRX readonly hotpath cache lines */
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_moderate_rcvbuf);
> +       /* RX readonly hotpath cache line */
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_ip_early_demux);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_early_demux);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_reordering);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
> +                                     sysctl_tcp_rmem);
> +}
> +
>  static __net_init int proc_net_ns_init(struct net *net)
>  {
>         struct proc_dir_entry *netd, *net_statd;
> @@ -351,6 +388,8 @@ static __net_init int proc_net_ns_init(struct net *net)
>         kgid_t gid;
>         int err;
>
> +       netns_ipv4_struct_check();
> +
>         /*
>          * This PDE acts only as an anchor for /proc/${pid}/net hierarchy.
>          * Corresponding inode (PDE(inode) == net->proc_net) is never
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 73f43f6991999..617074fccde68 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -42,6 +42,34 @@ struct inet_timewait_death_row {
>  struct tcp_fastopen_context;
>
>  struct netns_ipv4 {
> +       /* Cacheline organization can be found documented in
> +        * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
> +        * Please update the document when adding new fields.
> +        */
> +
> +       __cacheline_group_begin(netns_ipv4_read);

Same remark here, please use three different groups, instead of a single one.

__cacheline_group_begin(tx_path);

> +       /* TX readonly hotpath cache lines */
> +       u8 sysctl_tcp_early_retrans;
> +       u8 sysctl_tcp_tso_win_divisor;
> +       u8 sysctl_tcp_tso_rtt_log;
> +       u8 sysctl_tcp_autocorking;
> +       int sysctl_tcp_min_snd_mss;
> +       unsigned int sysctl_tcp_notsent_lowat;
> +       int sysctl_tcp_limit_output_bytes;
> +       int sysctl_tcp_min_rtt_wlen;
> +       int sysctl_tcp_wmem[3];
> +       u8 sysctl_ip_fwd_use_pmtu;
> +

__cacheline_group_end(tx_path);
__cacheline_group_begin(rxtx_path);
> +       /* TXRX readonly hotpath cache lines */
> +       u8 sysctl_tcp_moderate_rcvbuf;
> +

__cacheline_group_end(rxtx_path);
__cacheline_group_begin(rx_path);

> +       /* RX readonly hotpath cache line */
> +       u8 sysctl_ip_early_demux;
> +       u8 sysctl_tcp_early_demux;
> +       int sysctl_tcp_reordering;
> +       int sysctl_tcp_rmem[3];
> +       __cacheline_group_end(netns_ipv4_read);

__cacheline_group_end(rx_path);


> +
>         struct inet_timewait_death_row tcp_death_row;
>         struct udp_table *udp_table;
>
> @@ -96,17 +124,14 @@ struct netns_ipv4 {
>
>         u8 sysctl_ip_default_ttl;
>         u8 sysctl_ip_no_pmtu_disc;
> -       u8 sysctl_ip_fwd_use_pmtu;
>         u8 sysctl_ip_fwd_update_priority;
>         u8 sysctl_ip_nonlocal_bind;
>         u8 sysctl_ip_autobind_reuse;
>         /* Shall we try to damage output packets if routing dev changes? */
>         u8 sysctl_ip_dynaddr;
> -       u8 sysctl_ip_early_demux;
>  #ifdef CONFIG_NET_L3_MASTER_DEV
>         u8 sysctl_raw_l3mdev_accept;
>  #endif
> -       u8 sysctl_tcp_early_demux;
>         u8 sysctl_udp_early_demux;
>
>         u8 sysctl_nexthop_compat_mode;
> @@ -119,7 +144,6 @@ struct netns_ipv4 {
>         u8 sysctl_tcp_mtu_probing;
>         int sysctl_tcp_mtu_probe_floor;
>         int sysctl_tcp_base_mss;
> -       int sysctl_tcp_min_snd_mss;
>         int sysctl_tcp_probe_threshold;
>         u32 sysctl_tcp_probe_interval;
>
> @@ -135,17 +159,14 @@ struct netns_ipv4 {
>         u8 sysctl_tcp_backlog_ack_defer;
>         u8 sysctl_tcp_pingpong_thresh;
>
> -       int sysctl_tcp_reordering;
>         u8 sysctl_tcp_retries1;
>         u8 sysctl_tcp_retries2;
>         u8 sysctl_tcp_orphan_retries;
>         u8 sysctl_tcp_tw_reuse;
>         int sysctl_tcp_fin_timeout;
> -       unsigned int sysctl_tcp_notsent_lowat;
>         u8 sysctl_tcp_sack;
>         u8 sysctl_tcp_window_scaling;
>         u8 sysctl_tcp_timestamps;
> -       u8 sysctl_tcp_early_retrans;
>         u8 sysctl_tcp_recovery;
>         u8 sysctl_tcp_thin_linear_timeouts;
>         u8 sysctl_tcp_slow_start_after_idle;
> @@ -161,21 +182,13 @@ struct netns_ipv4 {
>         u8 sysctl_tcp_frto;
>         u8 sysctl_tcp_nometrics_save;
>         u8 sysctl_tcp_no_ssthresh_metrics_save;
> -       u8 sysctl_tcp_moderate_rcvbuf;
> -       u8 sysctl_tcp_tso_win_divisor;
>         u8 sysctl_tcp_workaround_signed_windows;
> -       int sysctl_tcp_limit_output_bytes;
>         int sysctl_tcp_challenge_ack_limit;
> -       int sysctl_tcp_min_rtt_wlen;
>         u8 sysctl_tcp_min_tso_segs;
> -       u8 sysctl_tcp_tso_rtt_log;
> -       u8 sysctl_tcp_autocorking;
>         u8 sysctl_tcp_reflect_tos;
>         int sysctl_tcp_invalid_ratelimit;
>         int sysctl_tcp_pacing_ss_ratio;
>         int sysctl_tcp_pacing_ca_ratio;
> -       int sysctl_tcp_wmem[3];
> -       int sysctl_tcp_rmem[3];
>         unsigned int sysctl_tcp_child_ehash_entries;
>         unsigned long sysctl_tcp_comp_sack_delay_ns;
>         unsigned long sysctl_tcp_comp_sack_slack_ns;
> --
> 2.42.0.758.gaed0368e0e-goog
>
diff mbox series

Patch

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 2ba31b6d68c07..38846be34acd9 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -344,6 +344,43 @@  const struct file_operations proc_net_operations = {
 	.iterate_shared	= proc_tgid_net_readdir,
 };
 
+static void __init netns_ipv4_struct_check(void)
+{
+	/* TX readonly hotpath cache lines */
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_early_retrans);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_tso_win_divisor);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_tso_rtt_log);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_autocorking);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_min_snd_mss);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_notsent_lowat);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_limit_output_bytes);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_min_rtt_wlen);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_wmem);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_ip_fwd_use_pmtu);
+	/* TXRX readonly hotpath cache lines */
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_moderate_rcvbuf);
+	/* RX readonly hotpath cache line */
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_ip_early_demux);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_early_demux);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_reordering);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read,
+				      sysctl_tcp_rmem);
+}
+
 static __net_init int proc_net_ns_init(struct net *net)
 {
 	struct proc_dir_entry *netd, *net_statd;
@@ -351,6 +388,8 @@  static __net_init int proc_net_ns_init(struct net *net)
 	kgid_t gid;
 	int err;
 
+	netns_ipv4_struct_check();
+
 	/*
 	 * This PDE acts only as an anchor for /proc/${pid}/net hierarchy.
 	 * Corresponding inode (PDE(inode) == net->proc_net) is never
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 73f43f6991999..617074fccde68 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -42,6 +42,34 @@  struct inet_timewait_death_row {
 struct tcp_fastopen_context;
 
 struct netns_ipv4 {
+	/* Cacheline organization can be found documented in
+	 * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
+	 * Please update the document when adding new fields.
+	 */
+
+	__cacheline_group_begin(netns_ipv4_read);
+	/* TX readonly hotpath cache lines */
+	u8 sysctl_tcp_early_retrans;
+	u8 sysctl_tcp_tso_win_divisor;
+	u8 sysctl_tcp_tso_rtt_log;
+	u8 sysctl_tcp_autocorking;
+	int sysctl_tcp_min_snd_mss;
+	unsigned int sysctl_tcp_notsent_lowat;
+	int sysctl_tcp_limit_output_bytes;
+	int sysctl_tcp_min_rtt_wlen;
+	int sysctl_tcp_wmem[3];
+	u8 sysctl_ip_fwd_use_pmtu;
+
+	/* TXRX readonly hotpath cache lines */
+	u8 sysctl_tcp_moderate_rcvbuf;
+
+	/* RX readonly hotpath cache line */
+	u8 sysctl_ip_early_demux;
+	u8 sysctl_tcp_early_demux;
+	int sysctl_tcp_reordering;
+	int sysctl_tcp_rmem[3];
+	__cacheline_group_end(netns_ipv4_read);
+
 	struct inet_timewait_death_row tcp_death_row;
 	struct udp_table *udp_table;
 
@@ -96,17 +124,14 @@  struct netns_ipv4 {
 
 	u8 sysctl_ip_default_ttl;
 	u8 sysctl_ip_no_pmtu_disc;
-	u8 sysctl_ip_fwd_use_pmtu;
 	u8 sysctl_ip_fwd_update_priority;
 	u8 sysctl_ip_nonlocal_bind;
 	u8 sysctl_ip_autobind_reuse;
 	/* Shall we try to damage output packets if routing dev changes? */
 	u8 sysctl_ip_dynaddr;
-	u8 sysctl_ip_early_demux;
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	u8 sysctl_raw_l3mdev_accept;
 #endif
-	u8 sysctl_tcp_early_demux;
 	u8 sysctl_udp_early_demux;
 
 	u8 sysctl_nexthop_compat_mode;
@@ -119,7 +144,6 @@  struct netns_ipv4 {
 	u8 sysctl_tcp_mtu_probing;
 	int sysctl_tcp_mtu_probe_floor;
 	int sysctl_tcp_base_mss;
-	int sysctl_tcp_min_snd_mss;
 	int sysctl_tcp_probe_threshold;
 	u32 sysctl_tcp_probe_interval;
 
@@ -135,17 +159,14 @@  struct netns_ipv4 {
 	u8 sysctl_tcp_backlog_ack_defer;
 	u8 sysctl_tcp_pingpong_thresh;
 
-	int sysctl_tcp_reordering;
 	u8 sysctl_tcp_retries1;
 	u8 sysctl_tcp_retries2;
 	u8 sysctl_tcp_orphan_retries;
 	u8 sysctl_tcp_tw_reuse;
 	int sysctl_tcp_fin_timeout;
-	unsigned int sysctl_tcp_notsent_lowat;
 	u8 sysctl_tcp_sack;
 	u8 sysctl_tcp_window_scaling;
 	u8 sysctl_tcp_timestamps;
-	u8 sysctl_tcp_early_retrans;
 	u8 sysctl_tcp_recovery;
 	u8 sysctl_tcp_thin_linear_timeouts;
 	u8 sysctl_tcp_slow_start_after_idle;
@@ -161,21 +182,13 @@  struct netns_ipv4 {
 	u8 sysctl_tcp_frto;
 	u8 sysctl_tcp_nometrics_save;
 	u8 sysctl_tcp_no_ssthresh_metrics_save;
-	u8 sysctl_tcp_moderate_rcvbuf;
-	u8 sysctl_tcp_tso_win_divisor;
 	u8 sysctl_tcp_workaround_signed_windows;
-	int sysctl_tcp_limit_output_bytes;
 	int sysctl_tcp_challenge_ack_limit;
-	int sysctl_tcp_min_rtt_wlen;
 	u8 sysctl_tcp_min_tso_segs;
-	u8 sysctl_tcp_tso_rtt_log;
-	u8 sysctl_tcp_autocorking;
 	u8 sysctl_tcp_reflect_tos;
 	int sysctl_tcp_invalid_ratelimit;
 	int sysctl_tcp_pacing_ss_ratio;
 	int sysctl_tcp_pacing_ca_ratio;
-	int sysctl_tcp_wmem[3];
-	int sysctl_tcp_rmem[3];
 	unsigned int sysctl_tcp_child_ehash_entries;
 	unsigned long sysctl_tcp_comp_sack_delay_ns;
 	unsigned long sysctl_tcp_comp_sack_slack_ns;