Message ID | 20231026081959.3477034-5-lixiaoyan@google.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Analyze and Reorganize core Networking Structs to optimize cacheline consumption | expand |
On Thu, Oct 26, 2023 at 10:20 AM Coco Li <lixiaoyan@google.com> wrote: > > Reorganize fast path variables on tx-txrx-rx order. > Fastpath cacheline ends after sysctl_tcp_rmem. > There are only read-only variables here. (write is on the control path > and not considered in this case) > > Below data generated with pahole on x86 architecture. > Fast path variables span cache lines before change: 4 > Fast path variables span cache lines after change: 2 > > Signed-off-by: Coco Li <lixiaoyan@google.com> > Suggested-by: Eric Dumazet <edumazet@google.com> > Reviewed-by: Wei Wang <weiwan@google.com> > Reviewed-by: David Ahern <dsahern@kernel.org> > --- > fs/proc/proc_net.c | 39 ++++++++++++++++++++++++++++++++++++ > include/net/netns/ipv4.h | 43 ++++++++++++++++++++++++++-------------- > 2 files changed, 67 insertions(+), 15 deletions(-) > > diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c > index 2ba31b6d68c07..38846be34acd9 100644 > --- a/fs/proc/proc_net.c > +++ b/fs/proc/proc_net.c > @@ -344,6 +344,43 @@ const struct file_operations proc_net_operations = { > .iterate_shared = proc_tgid_net_readdir, > }; > > +static void __init netns_ipv4_struct_check(void) > +{ > + /* TX readonly hotpath cache lines */ > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_early_retrans); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_tso_win_divisor); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_tso_rtt_log); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_autocorking); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_min_snd_mss); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_notsent_lowat); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_limit_output_bytes); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_min_rtt_wlen); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_wmem); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_ip_fwd_use_pmtu); > + /* TXRX readonly hotpath cache lines */ > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_moderate_rcvbuf); > + /* RX readonly hotpath cache line */ > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_ip_early_demux); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_early_demux); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_reordering); > + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, > + sysctl_tcp_rmem); > +} > + > static __net_init int proc_net_ns_init(struct net *net) > { > struct proc_dir_entry *netd, *net_statd; > @@ -351,6 +388,8 @@ static __net_init int proc_net_ns_init(struct net *net) > kgid_t gid; > int err; > > + netns_ipv4_struct_check(); > + > /* > * This PDE acts only as an anchor for /proc/${pid}/net hierarchy. > * Corresponding inode (PDE(inode) == net->proc_net) is never > diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h > index 73f43f6991999..617074fccde68 100644 > --- a/include/net/netns/ipv4.h > +++ b/include/net/netns/ipv4.h > @@ -42,6 +42,34 @@ struct inet_timewait_death_row { > struct tcp_fastopen_context; > > struct netns_ipv4 { > + /* Cacheline organization can be found documented in > + * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. > + * Please update the document when adding new fields. > + */ > + > + __cacheline_group_begin(netns_ipv4_read); Same remark here, please use three different groups, instead of a single one. __cacheline_group_begin(tx_path); > + /* TX readonly hotpath cache lines */ > + u8 sysctl_tcp_early_retrans; > + u8 sysctl_tcp_tso_win_divisor; > + u8 sysctl_tcp_tso_rtt_log; > + u8 sysctl_tcp_autocorking; > + int sysctl_tcp_min_snd_mss; > + unsigned int sysctl_tcp_notsent_lowat; > + int sysctl_tcp_limit_output_bytes; > + int sysctl_tcp_min_rtt_wlen; > + int sysctl_tcp_wmem[3]; > + u8 sysctl_ip_fwd_use_pmtu; > + __cacheline_group_end(tx_path); __cacheline_group_begin(rxtx_path); > + /* TXRX readonly hotpath cache lines */ > + u8 sysctl_tcp_moderate_rcvbuf; > + __cacheline_group_end(rxtx_path); __cacheline_group_begin(rx_path); > + /* RX readonly hotpath cache line */ > + u8 sysctl_ip_early_demux; > + u8 sysctl_tcp_early_demux; > + int sysctl_tcp_reordering; > + int sysctl_tcp_rmem[3]; > + __cacheline_group_end(netns_ipv4_read); __cacheline_group_end(rx_path); > + > struct inet_timewait_death_row tcp_death_row; > struct udp_table *udp_table; > > @@ -96,17 +124,14 @@ struct netns_ipv4 { > > u8 sysctl_ip_default_ttl; > u8 sysctl_ip_no_pmtu_disc; > - u8 sysctl_ip_fwd_use_pmtu; > u8 sysctl_ip_fwd_update_priority; > u8 sysctl_ip_nonlocal_bind; > u8 sysctl_ip_autobind_reuse; > /* Shall we try to damage output packets if routing dev changes? */ > u8 sysctl_ip_dynaddr; > - u8 sysctl_ip_early_demux; > #ifdef CONFIG_NET_L3_MASTER_DEV > u8 sysctl_raw_l3mdev_accept; > #endif > - u8 sysctl_tcp_early_demux; > u8 sysctl_udp_early_demux; > > u8 sysctl_nexthop_compat_mode; > @@ -119,7 +144,6 @@ struct netns_ipv4 { > u8 sysctl_tcp_mtu_probing; > int sysctl_tcp_mtu_probe_floor; > int sysctl_tcp_base_mss; > - int sysctl_tcp_min_snd_mss; > int sysctl_tcp_probe_threshold; > u32 sysctl_tcp_probe_interval; > > @@ -135,17 +159,14 @@ struct netns_ipv4 { > u8 sysctl_tcp_backlog_ack_defer; > u8 sysctl_tcp_pingpong_thresh; > > - int sysctl_tcp_reordering; > u8 sysctl_tcp_retries1; > u8 sysctl_tcp_retries2; > u8 sysctl_tcp_orphan_retries; > u8 sysctl_tcp_tw_reuse; > int sysctl_tcp_fin_timeout; > - unsigned int sysctl_tcp_notsent_lowat; > u8 sysctl_tcp_sack; > u8 sysctl_tcp_window_scaling; > u8 sysctl_tcp_timestamps; > - u8 sysctl_tcp_early_retrans; > u8 sysctl_tcp_recovery; > u8 sysctl_tcp_thin_linear_timeouts; > u8 sysctl_tcp_slow_start_after_idle; > @@ -161,21 +182,13 @@ struct netns_ipv4 { > u8 sysctl_tcp_frto; > u8 sysctl_tcp_nometrics_save; > u8 sysctl_tcp_no_ssthresh_metrics_save; > - u8 sysctl_tcp_moderate_rcvbuf; > - u8 sysctl_tcp_tso_win_divisor; > u8 sysctl_tcp_workaround_signed_windows; > - int sysctl_tcp_limit_output_bytes; > int sysctl_tcp_challenge_ack_limit; > - int sysctl_tcp_min_rtt_wlen; > u8 sysctl_tcp_min_tso_segs; > - u8 sysctl_tcp_tso_rtt_log; > - u8 sysctl_tcp_autocorking; > u8 sysctl_tcp_reflect_tos; > int sysctl_tcp_invalid_ratelimit; > int sysctl_tcp_pacing_ss_ratio; > int sysctl_tcp_pacing_ca_ratio; > - int sysctl_tcp_wmem[3]; > - int sysctl_tcp_rmem[3]; > unsigned int sysctl_tcp_child_ehash_entries; > unsigned long sysctl_tcp_comp_sack_delay_ns; > unsigned long sysctl_tcp_comp_sack_slack_ns; > -- > 2.42.0.758.gaed0368e0e-goog >
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 2ba31b6d68c07..38846be34acd9 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -344,6 +344,43 @@ const struct file_operations proc_net_operations = { .iterate_shared = proc_tgid_net_readdir, }; +static void __init netns_ipv4_struct_check(void) +{ + /* TX readonly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_early_retrans); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_tso_win_divisor); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_tso_rtt_log); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_autocorking); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_min_snd_mss); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_notsent_lowat); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_limit_output_bytes); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_min_rtt_wlen); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_wmem); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_ip_fwd_use_pmtu); + /* TXRX readonly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_moderate_rcvbuf); + /* RX readonly hotpath cache line */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_ip_early_demux); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_early_demux); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_reordering); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read, + sysctl_tcp_rmem); +} + static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; @@ -351,6 +388,8 @@ static __net_init int proc_net_ns_init(struct net *net) kgid_t gid; int err; + netns_ipv4_struct_check(); + /* * This PDE acts only as an anchor for /proc/${pid}/net hierarchy. * Corresponding inode (PDE(inode) == net->proc_net) is never diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 73f43f6991999..617074fccde68 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -42,6 +42,34 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; struct netns_ipv4 { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. + * Please update the document when adding new fields. + */ + + __cacheline_group_begin(netns_ipv4_read); + /* TX readonly hotpath cache lines */ + u8 sysctl_tcp_early_retrans; + u8 sysctl_tcp_tso_win_divisor; + u8 sysctl_tcp_tso_rtt_log; + u8 sysctl_tcp_autocorking; + int sysctl_tcp_min_snd_mss; + unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_limit_output_bytes; + int sysctl_tcp_min_rtt_wlen; + int sysctl_tcp_wmem[3]; + u8 sysctl_ip_fwd_use_pmtu; + + /* TXRX readonly hotpath cache lines */ + u8 sysctl_tcp_moderate_rcvbuf; + + /* RX readonly hotpath cache line */ + u8 sysctl_ip_early_demux; + u8 sysctl_tcp_early_demux; + int sysctl_tcp_reordering; + int sysctl_tcp_rmem[3]; + __cacheline_group_end(netns_ipv4_read); + struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; @@ -96,17 +124,14 @@ struct netns_ipv4 { u8 sysctl_ip_default_ttl; u8 sysctl_ip_no_pmtu_disc; - u8 sysctl_ip_fwd_use_pmtu; u8 sysctl_ip_fwd_update_priority; u8 sysctl_ip_nonlocal_bind; u8 sysctl_ip_autobind_reuse; /* Shall we try to damage output packets if routing dev changes? */ u8 sysctl_ip_dynaddr; - u8 sysctl_ip_early_demux; #ifdef CONFIG_NET_L3_MASTER_DEV u8 sysctl_raw_l3mdev_accept; #endif - u8 sysctl_tcp_early_demux; u8 sysctl_udp_early_demux; u8 sysctl_nexthop_compat_mode; @@ -119,7 +144,6 @@ struct netns_ipv4 { u8 sysctl_tcp_mtu_probing; int sysctl_tcp_mtu_probe_floor; int sysctl_tcp_base_mss; - int sysctl_tcp_min_snd_mss; int sysctl_tcp_probe_threshold; u32 sysctl_tcp_probe_interval; @@ -135,17 +159,14 @@ struct netns_ipv4 { u8 sysctl_tcp_backlog_ack_defer; u8 sysctl_tcp_pingpong_thresh; - int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; u8 sysctl_tcp_orphan_retries; u8 sysctl_tcp_tw_reuse; int sysctl_tcp_fin_timeout; - unsigned int sysctl_tcp_notsent_lowat; u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; - u8 sysctl_tcp_early_retrans; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; @@ -161,21 +182,13 @@ struct netns_ipv4 { u8 sysctl_tcp_frto; u8 sysctl_tcp_nometrics_save; u8 sysctl_tcp_no_ssthresh_metrics_save; - u8 sysctl_tcp_moderate_rcvbuf; - u8 sysctl_tcp_tso_win_divisor; u8 sysctl_tcp_workaround_signed_windows; - int sysctl_tcp_limit_output_bytes; int sysctl_tcp_challenge_ack_limit; - int sysctl_tcp_min_rtt_wlen; u8 sysctl_tcp_min_tso_segs; - u8 sysctl_tcp_tso_rtt_log; - u8 sysctl_tcp_autocorking; u8 sysctl_tcp_reflect_tos; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; - int sysctl_tcp_wmem[3]; - int sysctl_tcp_rmem[3]; unsigned int sysctl_tcp_child_ehash_entries; unsigned long sysctl_tcp_comp_sack_delay_ns; unsigned long sysctl_tcp_comp_sack_slack_ns;