diff mbox series

[v4,net-next,5/6] net-device: reorganize net_device fast path variables

Message ID 20231026081959.3477034-6-lixiaoyan@google.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Analyze and Reorganize core Networking Structs to optimize cacheline consumption | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5511 this patch: 5511
netdev/cc_maintainers warning 2 maintainers not CCed: davem@davemloft.net bpf@vger.kernel.org
netdev/build_clang success Errors and warnings before: 1670 this patch: 1670
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5869 this patch: 5869
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/kdoc fail Errors and warnings before: 0 this patch: 2
netdev/source_inline success Was 0 now: 0

Commit Message

Coco Li Oct. 26, 2023, 8:19 a.m. UTC
Reorganize fast path variables on tx-txrx-rx order
Fastpath variables end after npinfo.

Below data generated with pahole on x86 architecture.

Fast path variables span cache lines before change: 12
Fast path variables span cache lines after change: 4

Signed-off-by: Coco Li <lixiaoyan@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
---
 include/linux/netdevice.h | 113 ++++++++++++++++++++------------------
 net/core/dev.c            |  51 +++++++++++++++++
 2 files changed, 111 insertions(+), 53 deletions(-)

Comments

Eric Dumazet Oct. 26, 2023, 9:41 a.m. UTC | #1
On Thu, Oct 26, 2023 at 10:20 AM Coco Li <lixiaoyan@google.com> wrote:
>
> Reorganize fast path variables on tx-txrx-rx order
> Fastpath variables end after npinfo.
>
> Below data generated with pahole on x86 architecture.
>
> Fast path variables span cache lines before change: 12
> Fast path variables span cache lines after change: 4
>
> Signed-off-by: Coco Li <lixiaoyan@google.com>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Reviewed-by: David Ahern <dsahern@kernel.org>
> ---
>  include/linux/netdevice.h | 113 ++++++++++++++++++++------------------
>  net/core/dev.c            |  51 +++++++++++++++++
>  2 files changed, 111 insertions(+), 53 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index b8bf669212cce..26c4d57451bf0 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2076,6 +2076,66 @@ enum netdev_ml_priv_type {
>   */
>
>  struct net_device {
> +       /* Cacheline organization can be found documented in
> +        * Documentation/networking/net_cachelines/net_device.rst.
> +        * Please update the document when adding new fields.
> +        */
> +
> +       /* TX read-mostly hotpath */
> +       __cacheline_group_begin(net_device_read);

This should be net_device_write ? Or perhaps simply tx ?


> +       unsigned long long      priv_flags;
> +       const struct net_device_ops *netdev_ops;
> +       const struct header_ops *header_ops;
> +       struct netdev_queue     *_tx;
> +       unsigned int            real_num_tx_queues;
> +       unsigned int            gso_max_size;
> +       unsigned int            gso_ipv4_max_size;
> +       u16                     gso_max_segs;
> +       s16                     num_tc;
> +       /* Note : dev->mtu is often read without holding a lock.
> +        * Writers usually hold RTNL.
> +        * It is recommended to use READ_ONCE() to annotate the reads,
> +        * and to use WRITE_ONCE() to annotate the writes.
> +        */
> +       unsigned int            mtu;
> +       unsigned short          needed_headroom;
> +       struct netdev_tc_txq    tc_to_txq[TC_MAX_QUEUE];
> +#ifdef CONFIG_XPS
> +       struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
> +#endif
> +#ifdef CONFIG_NETFILTER_EGRESS
> +       struct nf_hook_entries __rcu *nf_hooks_egress;
> +#endif
> +#ifdef CONFIG_NET_XGRESS
> +       struct bpf_mprog_entry __rcu *tcx_egress;
> +#endif
> +
 __cacheline_group_end(tx);

 __cacheline_group_begin(txrx);


> +       /* TXRX read-mostly hotpath */
> +       unsigned int            flags;
> +       unsigned short          hard_header_len;
> +       netdev_features_t       features;
> +       struct inet6_dev __rcu  *ip6_ptr;
> +

 __cacheline_group_end(txrx);

 __cacheline_group_begin(rx);

> +       /* RX read-mostly hotpath */
> +       struct list_head        ptype_specific;
> +       int                     ifindex;
> +       unsigned int            real_num_rx_queues;
> +       struct netdev_rx_queue  *_rx;
> +       unsigned long           gro_flush_timeout;
> +       int                     napi_defer_hard_irqs;
> +       unsigned int            gro_max_size;
> +       unsigned int            gro_ipv4_max_size;
> +       rx_handler_func_t __rcu *rx_handler;
> +       void __rcu              *rx_handler_data;
> +       possible_net_t                  nd_net;
> +#ifdef CONFIG_NETPOLL
> +       struct netpoll_info __rcu       *npinfo;
> +#endif
> +#ifdef CONFIG_NET_XGRESS
> +       struct bpf_mprog_entry __rcu *tcx_ingress;
> +#endif
> +       __cacheline_group_end(net_device_read);
> +
>         char                    name[IFNAMSIZ];
>         struct netdev_name_node *name_node;
>         struct dev_ifalias      __rcu *ifalias;
> @@ -2100,7 +2160,6 @@ struct net_device {
>         struct list_head        unreg_list;
>         struct list_head        close_list;
>         struct list_head        ptype_all;
> -       struct list_head        ptype_specific;
>
>         struct {
>                 struct list_head upper;
> @@ -2108,25 +2167,12 @@ struct net_device {
>         } adj_list;
>
>         /* Read-mostly cache-line for fast-path access */
> -       unsigned int            flags;
>         xdp_features_t          xdp_features;
> -       unsigned long long      priv_flags;
> -       const struct net_device_ops *netdev_ops;
>         const struct xdp_metadata_ops *xdp_metadata_ops;
> -       int                     ifindex;
>         unsigned short          gflags;
> -       unsigned short          hard_header_len;
>
> -       /* Note : dev->mtu is often read without holding a lock.
> -        * Writers usually hold RTNL.
> -        * It is recommended to use READ_ONCE() to annotate the reads,
> -        * and to use WRITE_ONCE() to annotate the writes.
> -        */
> -       unsigned int            mtu;
> -       unsigned short          needed_headroom;
>         unsigned short          needed_tailroom;
>
> -       netdev_features_t       features;
>         netdev_features_t       hw_features;
>         netdev_features_t       wanted_features;
>         netdev_features_t       vlan_features;
> @@ -2170,8 +2216,6 @@ struct net_device {
>         const struct tlsdev_ops *tlsdev_ops;
>  #endif
>
> -       const struct header_ops *header_ops;
> -
>         unsigned char           operstate;
>         unsigned char           link_mode;
>
> @@ -2212,9 +2256,7 @@ struct net_device {
>
>
>         /* Protocol-specific pointers */
> -
>         struct in_device __rcu  *ip_ptr;
> -       struct inet6_dev __rcu  *ip6_ptr;
>  #if IS_ENABLED(CONFIG_VLAN_8021Q)
>         struct vlan_info __rcu  *vlan_info;
>  #endif
> @@ -2249,26 +2291,14 @@ struct net_device {
>         /* Interface address info used in eth_type_trans() */
>         const unsigned char     *dev_addr;
>
> -       struct netdev_rx_queue  *_rx;
>         unsigned int            num_rx_queues;
> -       unsigned int            real_num_rx_queues;
> -
>         struct bpf_prog __rcu   *xdp_prog;
> -       unsigned long           gro_flush_timeout;
> -       int                     napi_defer_hard_irqs;
>  #define GRO_LEGACY_MAX_SIZE    65536u
>  /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
>   * and shinfo->gso_segs is a 16bit field.
>   */
>  #define GRO_MAX_SIZE           (8 * 65535u)
> -       unsigned int            gro_max_size;
> -       unsigned int            gro_ipv4_max_size;
>         unsigned int            xdp_zc_max_segs;
> -       rx_handler_func_t __rcu *rx_handler;
> -       void __rcu              *rx_handler_data;
> -#ifdef CONFIG_NET_XGRESS
> -       struct bpf_mprog_entry __rcu *tcx_ingress;
> -#endif
>         struct netdev_queue __rcu *ingress_queue;
>  #ifdef CONFIG_NETFILTER_INGRESS
>         struct nf_hook_entries __rcu *nf_hooks_ingress;
> @@ -2283,25 +2313,13 @@ struct net_device {
>  /*
>   * Cache lines mostly used on transmit path
>   */
> -       struct netdev_queue     *_tx ____cacheline_aligned_in_smp;
>         unsigned int            num_tx_queues;
> -       unsigned int            real_num_tx_queues;
>         struct Qdisc __rcu      *qdisc;
>         unsigned int            tx_queue_len;
>         spinlock_t              tx_global_lock;
>
>         struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
>
> -#ifdef CONFIG_XPS
> -       struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
> -#endif
> -#ifdef CONFIG_NET_XGRESS
> -       struct bpf_mprog_entry __rcu *tcx_egress;
> -#endif
> -#ifdef CONFIG_NETFILTER_EGRESS
> -       struct nf_hook_entries __rcu *nf_hooks_egress;
> -#endif
> -
>  #ifdef CONFIG_NET_SCHED
>         DECLARE_HASHTABLE       (qdisc_hash, 4);
>  #endif
> @@ -2340,12 +2358,6 @@ struct net_device {
>         bool needs_free_netdev;
>         void (*priv_destructor)(struct net_device *dev);
>
> -#ifdef CONFIG_NETPOLL
> -       struct netpoll_info __rcu       *npinfo;
> -#endif
> -
> -       possible_net_t                  nd_net;
> -
>         /* mid-layer private */
>         void                            *ml_priv;
>         enum netdev_ml_priv_type        ml_priv_type;
> @@ -2379,20 +2391,15 @@ struct net_device {
>   */
>  #define GSO_MAX_SIZE           (8 * GSO_MAX_SEGS)
>
> -       unsigned int            gso_max_size;
>  #define TSO_LEGACY_MAX_SIZE    65536
>  #define TSO_MAX_SIZE           UINT_MAX
>         unsigned int            tso_max_size;
> -       u16                     gso_max_segs;
>  #define TSO_MAX_SEGS           U16_MAX
>         u16                     tso_max_segs;
> -       unsigned int            gso_ipv4_max_size;
>
>  #ifdef CONFIG_DCB
>         const struct dcbnl_rtnl_ops *dcbnl_ops;
>  #endif
> -       s16                     num_tc;
> -       struct netdev_tc_txq    tc_to_txq[TC_MAX_QUEUE];
>         u8                      prio_tc_map[TC_BITMASK + 1];
>
>  #if IS_ENABLED(CONFIG_FCOE)
> diff --git a/net/core/dev.c b/net/core/dev.c
> index a37a932a3e145..ca7e653e6c348 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -11511,6 +11511,55 @@ static struct pernet_operations __net_initdata default_device_ops = {
>         .exit_batch = default_device_exit_batch,
>  };
>
> +static void __init net_dev_struct_check(void)
> +{
> +       /* TX read-mostly hotpath */

Of course, change net_device_read to either rx, txrx, or tx, depending
of each field purpose/location.

> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, priv_flags);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, netdev_ops);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, header_ops);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, _tx);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, real_num_tx_queues);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gso_max_size);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gso_ipv4_max_size);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gso_max_segs);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, num_tc);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, mtu);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, needed_headroom);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, tc_to_txq);
> +#ifdef CONFIG_XPS
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, xps_maps);
> +#endif
> +#ifdef CONFIG_NETFILTER_EGRESS
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, nf_hooks_egress);
> +#endif
> +#ifdef CONFIG_NET_XGRESS
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, tcx_egress);
> +#endif
> +       /* TXRX read-mostly hotpath */
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, flags);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, hard_header_len);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, features);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, ip6_ptr);
> +       /* RX read-mostly hotpath */
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, ptype_specific);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, ifindex);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, real_num_rx_queues);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, _rx);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gro_flush_timeout);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, napi_defer_hard_irqs);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gro_max_size);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gro_ipv4_max_size);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, rx_handler);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, rx_handler_data);
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, nd_net);
> +#ifdef CONFIG_NETPOLL
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, npinfo);
> +#endif
> +#ifdef CONFIG_NET_XGRESS
> +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, tcx_ingress);
> +#endif
> +}
> +
>  /*
>   *     Initialize the DEV module. At boot time this walks the device list and
>   *     unhooks any devices that fail to initialise (normally hardware not
> @@ -11528,6 +11577,8 @@ static int __init net_dev_init(void)
>
>         BUG_ON(!dev_boot_phase);
>
> +       net_dev_struct_check();
> +
>         if (dev_proc_init())
>                 goto out;
>
> --
> 2.42.0.758.gaed0368e0e-goog
>
Coco Li Oct. 28, 2023, 1:33 a.m. UTC | #2
On Thu, Oct 26, 2023 at 2:42 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Oct 26, 2023 at 10:20 AM Coco Li <lixiaoyan@google.com> wrote:
> >
> > Reorganize fast path variables on tx-txrx-rx order
> > Fastpath variables end after npinfo.
> >
> > Below data generated with pahole on x86 architecture.
> >
> > Fast path variables span cache lines before change: 12
> > Fast path variables span cache lines after change: 4
> >
> > Signed-off-by: Coco Li <lixiaoyan@google.com>
> > Suggested-by: Eric Dumazet <edumazet@google.com>
> > Reviewed-by: David Ahern <dsahern@kernel.org>
> > ---
> >  include/linux/netdevice.h | 113 ++++++++++++++++++++------------------
> >  net/core/dev.c            |  51 +++++++++++++++++
> >  2 files changed, 111 insertions(+), 53 deletions(-)
> >
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index b8bf669212cce..26c4d57451bf0 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -2076,6 +2076,66 @@ enum netdev_ml_priv_type {
> >   */
> >
> >  struct net_device {
> > +       /* Cacheline organization can be found documented in
> > +        * Documentation/networking/net_cachelines/net_device.rst.
> > +        * Please update the document when adding new fields.
> > +        */
> > +
> > +       /* TX read-mostly hotpath */
> > +       __cacheline_group_begin(net_device_read);
>
> This should be net_device_write ? Or perhaps simply tx ?
>
>
> > +       unsigned long long      priv_flags;
> > +       const struct net_device_ops *netdev_ops;
> > +       const struct header_ops *header_ops;
> > +       struct netdev_queue     *_tx;
> > +       unsigned int            real_num_tx_queues;
> > +       unsigned int            gso_max_size;
> > +       unsigned int            gso_ipv4_max_size;
> > +       u16                     gso_max_segs;
> > +       s16                     num_tc;
> > +       /* Note : dev->mtu is often read without holding a lock.
> > +        * Writers usually hold RTNL.
> > +        * It is recommended to use READ_ONCE() to annotate the reads,
> > +        * and to use WRITE_ONCE() to annotate the writes.
> > +        */
> > +       unsigned int            mtu;
> > +       unsigned short          needed_headroom;
> > +       struct netdev_tc_txq    tc_to_txq[TC_MAX_QUEUE];
> > +#ifdef CONFIG_XPS
> > +       struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
> > +#endif
> > +#ifdef CONFIG_NETFILTER_EGRESS
> > +       struct nf_hook_entries __rcu *nf_hooks_egress;
> > +#endif
> > +#ifdef CONFIG_NET_XGRESS
> > +       struct bpf_mprog_entry __rcu *tcx_egress;
> > +#endif
> > +
>  __cacheline_group_end(tx);
>
>  __cacheline_group_begin(txrx);
>
>
> > +       /* TXRX read-mostly hotpath */
> > +       unsigned int            flags;
> > +       unsigned short          hard_header_len;
> > +       netdev_features_t       features;
> > +       struct inet6_dev __rcu  *ip6_ptr;
> > +
>
>  __cacheline_group_end(txrx);
>
>  __cacheline_group_begin(rx);
>
> > +       /* RX read-mostly hotpath */
> > +       struct list_head        ptype_specific;
> > +       int                     ifindex;
> > +       unsigned int            real_num_rx_queues;
> > +       struct netdev_rx_queue  *_rx;
> > +       unsigned long           gro_flush_timeout;
> > +       int                     napi_defer_hard_irqs;
> > +       unsigned int            gro_max_size;
> > +       unsigned int            gro_ipv4_max_size;
> > +       rx_handler_func_t __rcu *rx_handler;
> > +       void __rcu              *rx_handler_data;
> > +       possible_net_t                  nd_net;
> > +#ifdef CONFIG_NETPOLL
> > +       struct netpoll_info __rcu       *npinfo;
> > +#endif
> > +#ifdef CONFIG_NET_XGRESS
> > +       struct bpf_mprog_entry __rcu *tcx_ingress;
> > +#endif
> > +       __cacheline_group_end(net_device_read);
> > +
> >         char                    name[IFNAMSIZ];
> >         struct netdev_name_node *name_node;
> >         struct dev_ifalias      __rcu *ifalias;
> > @@ -2100,7 +2160,6 @@ struct net_device {
> >         struct list_head        unreg_list;
> >         struct list_head        close_list;
> >         struct list_head        ptype_all;
> > -       struct list_head        ptype_specific;
> >
> >         struct {
> >                 struct list_head upper;
> > @@ -2108,25 +2167,12 @@ struct net_device {
> >         } adj_list;
> >
> >         /* Read-mostly cache-line for fast-path access */
> > -       unsigned int            flags;
> >         xdp_features_t          xdp_features;
> > -       unsigned long long      priv_flags;
> > -       const struct net_device_ops *netdev_ops;
> >         const struct xdp_metadata_ops *xdp_metadata_ops;
> > -       int                     ifindex;
> >         unsigned short          gflags;
> > -       unsigned short          hard_header_len;
> >
> > -       /* Note : dev->mtu is often read without holding a lock.
> > -        * Writers usually hold RTNL.
> > -        * It is recommended to use READ_ONCE() to annotate the reads,
> > -        * and to use WRITE_ONCE() to annotate the writes.
> > -        */
> > -       unsigned int            mtu;
> > -       unsigned short          needed_headroom;
> >         unsigned short          needed_tailroom;
> >
> > -       netdev_features_t       features;
> >         netdev_features_t       hw_features;
> >         netdev_features_t       wanted_features;
> >         netdev_features_t       vlan_features;
> > @@ -2170,8 +2216,6 @@ struct net_device {
> >         const struct tlsdev_ops *tlsdev_ops;
> >  #endif
> >
> > -       const struct header_ops *header_ops;
> > -
> >         unsigned char           operstate;
> >         unsigned char           link_mode;
> >
> > @@ -2212,9 +2256,7 @@ struct net_device {
> >
> >
> >         /* Protocol-specific pointers */
> > -
> >         struct in_device __rcu  *ip_ptr;
> > -       struct inet6_dev __rcu  *ip6_ptr;
> >  #if IS_ENABLED(CONFIG_VLAN_8021Q)
> >         struct vlan_info __rcu  *vlan_info;
> >  #endif
> > @@ -2249,26 +2291,14 @@ struct net_device {
> >         /* Interface address info used in eth_type_trans() */
> >         const unsigned char     *dev_addr;
> >
> > -       struct netdev_rx_queue  *_rx;
> >         unsigned int            num_rx_queues;
> > -       unsigned int            real_num_rx_queues;
> > -
> >         struct bpf_prog __rcu   *xdp_prog;
> > -       unsigned long           gro_flush_timeout;
> > -       int                     napi_defer_hard_irqs;
> >  #define GRO_LEGACY_MAX_SIZE    65536u
> >  /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
> >   * and shinfo->gso_segs is a 16bit field.
> >   */
> >  #define GRO_MAX_SIZE           (8 * 65535u)
> > -       unsigned int            gro_max_size;
> > -       unsigned int            gro_ipv4_max_size;
> >         unsigned int            xdp_zc_max_segs;
> > -       rx_handler_func_t __rcu *rx_handler;
> > -       void __rcu              *rx_handler_data;
> > -#ifdef CONFIG_NET_XGRESS
> > -       struct bpf_mprog_entry __rcu *tcx_ingress;
> > -#endif
> >         struct netdev_queue __rcu *ingress_queue;
> >  #ifdef CONFIG_NETFILTER_INGRESS
> >         struct nf_hook_entries __rcu *nf_hooks_ingress;
> > @@ -2283,25 +2313,13 @@ struct net_device {
> >  /*
> >   * Cache lines mostly used on transmit path
> >   */
> > -       struct netdev_queue     *_tx ____cacheline_aligned_in_smp;
> >         unsigned int            num_tx_queues;
> > -       unsigned int            real_num_tx_queues;
> >         struct Qdisc __rcu      *qdisc;
> >         unsigned int            tx_queue_len;
> >         spinlock_t              tx_global_lock;
> >
> >         struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
> >
> > -#ifdef CONFIG_XPS
> > -       struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
> > -#endif
> > -#ifdef CONFIG_NET_XGRESS
> > -       struct bpf_mprog_entry __rcu *tcx_egress;
> > -#endif
> > -#ifdef CONFIG_NETFILTER_EGRESS
> > -       struct nf_hook_entries __rcu *nf_hooks_egress;
> > -#endif
> > -
> >  #ifdef CONFIG_NET_SCHED
> >         DECLARE_HASHTABLE       (qdisc_hash, 4);
> >  #endif
> > @@ -2340,12 +2358,6 @@ struct net_device {
> >         bool needs_free_netdev;
> >         void (*priv_destructor)(struct net_device *dev);
> >
> > -#ifdef CONFIG_NETPOLL
> > -       struct netpoll_info __rcu       *npinfo;
> > -#endif
> > -
> > -       possible_net_t                  nd_net;
> > -
> >         /* mid-layer private */
> >         void                            *ml_priv;
> >         enum netdev_ml_priv_type        ml_priv_type;
> > @@ -2379,20 +2391,15 @@ struct net_device {
> >   */
> >  #define GSO_MAX_SIZE           (8 * GSO_MAX_SEGS)
> >
> > -       unsigned int            gso_max_size;
> >  #define TSO_LEGACY_MAX_SIZE    65536
> >  #define TSO_MAX_SIZE           UINT_MAX
> >         unsigned int            tso_max_size;
> > -       u16                     gso_max_segs;
> >  #define TSO_MAX_SEGS           U16_MAX
> >         u16                     tso_max_segs;
> > -       unsigned int            gso_ipv4_max_size;
> >
> >  #ifdef CONFIG_DCB
> >         const struct dcbnl_rtnl_ops *dcbnl_ops;
> >  #endif
> > -       s16                     num_tc;
> > -       struct netdev_tc_txq    tc_to_txq[TC_MAX_QUEUE];
> >         u8                      prio_tc_map[TC_BITMASK + 1];
> >
> >  #if IS_ENABLED(CONFIG_FCOE)
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index a37a932a3e145..ca7e653e6c348 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -11511,6 +11511,55 @@ static struct pernet_operations __net_initdata default_device_ops = {
> >         .exit_batch = default_device_exit_batch,
> >  };
> >
> > +static void __init net_dev_struct_check(void)
> > +{
> > +       /* TX read-mostly hotpath */
>
> Of course, change net_device_read to either rx, txrx, or tx, depending
> of each field purpose/location.

The group names need to be unique, hence the verbosity. I will update
the patch series with more detailed cache line group separations.
Thank you!
>
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, priv_flags);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, netdev_ops);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, header_ops);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, _tx);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, real_num_tx_queues);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gso_max_size);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gso_ipv4_max_size);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gso_max_segs);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, num_tc);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, mtu);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, needed_headroom);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, tc_to_txq);
> > +#ifdef CONFIG_XPS
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, xps_maps);
> > +#endif
> > +#ifdef CONFIG_NETFILTER_EGRESS
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, nf_hooks_egress);
> > +#endif
> > +#ifdef CONFIG_NET_XGRESS
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, tcx_egress);
> > +#endif
> > +       /* TXRX read-mostly hotpath */
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, flags);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, hard_header_len);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, features);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, ip6_ptr);
> > +       /* RX read-mostly hotpath */
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, ptype_specific);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, ifindex);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, real_num_rx_queues);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, _rx);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gro_flush_timeout);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, napi_defer_hard_irqs);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gro_max_size);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gro_ipv4_max_size);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, rx_handler);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, rx_handler_data);
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, nd_net);
> > +#ifdef CONFIG_NETPOLL
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, npinfo);
> > +#endif
> > +#ifdef CONFIG_NET_XGRESS
> > +       CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, tcx_ingress);
> > +#endif
> > +}
> > +
> >  /*
> >   *     Initialize the DEV module. At boot time this walks the device list and
> >   *     unhooks any devices that fail to initialise (normally hardware not
> > @@ -11528,6 +11577,8 @@ static int __init net_dev_init(void)
> >
> >         BUG_ON(!dev_boot_phase);
> >
> > +       net_dev_struct_check();
> > +
> >         if (dev_proc_init())
> >                 goto out;
> >
> > --
> > 2.42.0.758.gaed0368e0e-goog
> >
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b8bf669212cce..26c4d57451bf0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2076,6 +2076,66 @@  enum netdev_ml_priv_type {
  */
 
 struct net_device {
+	/* Cacheline organization can be found documented in
+	 * Documentation/networking/net_cachelines/net_device.rst.
+	 * Please update the document when adding new fields.
+	 */
+
+	/* TX read-mostly hotpath */
+	__cacheline_group_begin(net_device_read);
+	unsigned long long	priv_flags;
+	const struct net_device_ops *netdev_ops;
+	const struct header_ops *header_ops;
+	struct netdev_queue	*_tx;
+	unsigned int		real_num_tx_queues;
+	unsigned int		gso_max_size;
+	unsigned int		gso_ipv4_max_size;
+	u16			gso_max_segs;
+	s16			num_tc;
+	/* Note : dev->mtu is often read without holding a lock.
+	 * Writers usually hold RTNL.
+	 * It is recommended to use READ_ONCE() to annotate the reads,
+	 * and to use WRITE_ONCE() to annotate the writes.
+	 */
+	unsigned int		mtu;
+	unsigned short		needed_headroom;
+	struct netdev_tc_txq	tc_to_txq[TC_MAX_QUEUE];
+#ifdef CONFIG_XPS
+	struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+	struct nf_hook_entries __rcu *nf_hooks_egress;
+#endif
+#ifdef CONFIG_NET_XGRESS
+	struct bpf_mprog_entry __rcu *tcx_egress;
+#endif
+
+	/* TXRX read-mostly hotpath */
+	unsigned int		flags;
+	unsigned short		hard_header_len;
+	netdev_features_t	features;
+	struct inet6_dev __rcu	*ip6_ptr;
+
+	/* RX read-mostly hotpath */
+	struct list_head	ptype_specific;
+	int			ifindex;
+	unsigned int		real_num_rx_queues;
+	struct netdev_rx_queue	*_rx;
+	unsigned long		gro_flush_timeout;
+	int			napi_defer_hard_irqs;
+	unsigned int		gro_max_size;
+	unsigned int		gro_ipv4_max_size;
+	rx_handler_func_t __rcu	*rx_handler;
+	void __rcu		*rx_handler_data;
+	possible_net_t			nd_net;
+#ifdef CONFIG_NETPOLL
+	struct netpoll_info __rcu	*npinfo;
+#endif
+#ifdef CONFIG_NET_XGRESS
+	struct bpf_mprog_entry __rcu *tcx_ingress;
+#endif
+	__cacheline_group_end(net_device_read);
+
 	char			name[IFNAMSIZ];
 	struct netdev_name_node	*name_node;
 	struct dev_ifalias	__rcu *ifalias;
@@ -2100,7 +2160,6 @@  struct net_device {
 	struct list_head	unreg_list;
 	struct list_head	close_list;
 	struct list_head	ptype_all;
-	struct list_head	ptype_specific;
 
 	struct {
 		struct list_head upper;
@@ -2108,25 +2167,12 @@  struct net_device {
 	} adj_list;
 
 	/* Read-mostly cache-line for fast-path access */
-	unsigned int		flags;
 	xdp_features_t		xdp_features;
-	unsigned long long	priv_flags;
-	const struct net_device_ops *netdev_ops;
 	const struct xdp_metadata_ops *xdp_metadata_ops;
-	int			ifindex;
 	unsigned short		gflags;
-	unsigned short		hard_header_len;
 
-	/* Note : dev->mtu is often read without holding a lock.
-	 * Writers usually hold RTNL.
-	 * It is recommended to use READ_ONCE() to annotate the reads,
-	 * and to use WRITE_ONCE() to annotate the writes.
-	 */
-	unsigned int		mtu;
-	unsigned short		needed_headroom;
 	unsigned short		needed_tailroom;
 
-	netdev_features_t	features;
 	netdev_features_t	hw_features;
 	netdev_features_t	wanted_features;
 	netdev_features_t	vlan_features;
@@ -2170,8 +2216,6 @@  struct net_device {
 	const struct tlsdev_ops *tlsdev_ops;
 #endif
 
-	const struct header_ops *header_ops;
-
 	unsigned char		operstate;
 	unsigned char		link_mode;
 
@@ -2212,9 +2256,7 @@  struct net_device {
 
 
 	/* Protocol-specific pointers */
-
 	struct in_device __rcu	*ip_ptr;
-	struct inet6_dev __rcu	*ip6_ptr;
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 	struct vlan_info __rcu	*vlan_info;
 #endif
@@ -2249,26 +2291,14 @@  struct net_device {
 	/* Interface address info used in eth_type_trans() */
 	const unsigned char	*dev_addr;
 
-	struct netdev_rx_queue	*_rx;
 	unsigned int		num_rx_queues;
-	unsigned int		real_num_rx_queues;
-
 	struct bpf_prog __rcu	*xdp_prog;
-	unsigned long		gro_flush_timeout;
-	int			napi_defer_hard_irqs;
 #define GRO_LEGACY_MAX_SIZE	65536u
 /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE),
  * and shinfo->gso_segs is a 16bit field.
  */
 #define GRO_MAX_SIZE		(8 * 65535u)
-	unsigned int		gro_max_size;
-	unsigned int		gro_ipv4_max_size;
 	unsigned int		xdp_zc_max_segs;
-	rx_handler_func_t __rcu	*rx_handler;
-	void __rcu		*rx_handler_data;
-#ifdef CONFIG_NET_XGRESS
-	struct bpf_mprog_entry __rcu *tcx_ingress;
-#endif
 	struct netdev_queue __rcu *ingress_queue;
 #ifdef CONFIG_NETFILTER_INGRESS
 	struct nf_hook_entries __rcu *nf_hooks_ingress;
@@ -2283,25 +2313,13 @@  struct net_device {
 /*
  * Cache lines mostly used on transmit path
  */
-	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 	unsigned int		num_tx_queues;
-	unsigned int		real_num_tx_queues;
 	struct Qdisc __rcu	*qdisc;
 	unsigned int		tx_queue_len;
 	spinlock_t		tx_global_lock;
 
 	struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
 
-#ifdef CONFIG_XPS
-	struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
-#endif
-#ifdef CONFIG_NET_XGRESS
-	struct bpf_mprog_entry __rcu *tcx_egress;
-#endif
-#ifdef CONFIG_NETFILTER_EGRESS
-	struct nf_hook_entries __rcu *nf_hooks_egress;
-#endif
-
 #ifdef CONFIG_NET_SCHED
 	DECLARE_HASHTABLE	(qdisc_hash, 4);
 #endif
@@ -2340,12 +2358,6 @@  struct net_device {
 	bool needs_free_netdev;
 	void (*priv_destructor)(struct net_device *dev);
 
-#ifdef CONFIG_NETPOLL
-	struct netpoll_info __rcu	*npinfo;
-#endif
-
-	possible_net_t			nd_net;
-
 	/* mid-layer private */
 	void				*ml_priv;
 	enum netdev_ml_priv_type	ml_priv_type;
@@ -2379,20 +2391,15 @@  struct net_device {
  */
 #define GSO_MAX_SIZE		(8 * GSO_MAX_SEGS)
 
-	unsigned int		gso_max_size;
 #define TSO_LEGACY_MAX_SIZE	65536
 #define TSO_MAX_SIZE		UINT_MAX
 	unsigned int		tso_max_size;
-	u16			gso_max_segs;
 #define TSO_MAX_SEGS		U16_MAX
 	u16			tso_max_segs;
-	unsigned int		gso_ipv4_max_size;
 
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
-	s16			num_tc;
-	struct netdev_tc_txq	tc_to_txq[TC_MAX_QUEUE];
 	u8			prio_tc_map[TC_BITMASK + 1];
 
 #if IS_ENABLED(CONFIG_FCOE)
diff --git a/net/core/dev.c b/net/core/dev.c
index a37a932a3e145..ca7e653e6c348 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11511,6 +11511,55 @@  static struct pernet_operations __net_initdata default_device_ops = {
 	.exit_batch = default_device_exit_batch,
 };
 
+static void __init net_dev_struct_check(void)
+{
+	/* TX read-mostly hotpath */
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, priv_flags);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, netdev_ops);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, header_ops);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, _tx);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, real_num_tx_queues);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gso_max_size);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gso_ipv4_max_size);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gso_max_segs);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, num_tc);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, mtu);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, needed_headroom);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, tc_to_txq);
+#ifdef CONFIG_XPS
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, xps_maps);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, nf_hooks_egress);
+#endif
+#ifdef CONFIG_NET_XGRESS
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, tcx_egress);
+#endif
+	/* TXRX read-mostly hotpath */
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, flags);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, hard_header_len);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, features);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, ip6_ptr);
+	/* RX read-mostly hotpath */
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, ptype_specific);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, ifindex);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, real_num_rx_queues);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, _rx);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gro_flush_timeout);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, napi_defer_hard_irqs);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gro_max_size);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, gro_ipv4_max_size);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, rx_handler);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, rx_handler_data);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, nd_net);
+#ifdef CONFIG_NETPOLL
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, npinfo);
+#endif
+#ifdef CONFIG_NET_XGRESS
+	CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read, tcx_ingress);
+#endif
+}
+
 /*
  *	Initialize the DEV module. At boot time this walks the device list and
  *	unhooks any devices that fail to initialise (normally hardware not
@@ -11528,6 +11577,8 @@  static int __init net_dev_init(void)
 
 	BUG_ON(!dev_boot_phase);
 
+	net_dev_struct_check();
+
 	if (dev_proc_init())
 		goto out;