Message ID | 20220105132141.2648876-4-vladimir.oltean@nxp.com (mailing list archive) |
---|---|
State | Accepted |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Cleanup to main DSA structures | expand |
On 1/5/22 5:21 AM, Vladimir Oltean wrote: > Both dsa_port :: type and dsa_port :: index introduce a 4 octet hole > after them, so we can group them together and the holes would be > eliminated, turning 16 octets of storage into just 8. This makes the > cpu_dp pointer fit in the first cache line, which is good, because > dsa_slave_to_master(), called by dsa_enqueue_skb(), uses it. > > Before: > > pahole -C dsa_port net/dsa/slave.o > struct dsa_port { > union { > struct net_device * master; /* 0 8 */ > struct net_device * slave; /* 0 8 */ > }; /* 0 8 */ > const struct dsa_device_ops * tag_ops; /* 8 8 */ > struct dsa_switch_tree * dst; /* 16 8 */ > struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ > enum { > DSA_PORT_TYPE_UNUSED = 0, > DSA_PORT_TYPE_CPU = 1, > DSA_PORT_TYPE_DSA = 2, > DSA_PORT_TYPE_USER = 3, > } type; /* 32 4 */ > > /* XXX 4 bytes hole, try to pack */ > > struct dsa_switch * ds; /* 40 8 */ > unsigned int index; /* 48 4 */ > > /* XXX 4 bytes hole, try to pack */ > > const char * name; /* 56 8 */ > /* --- cacheline 1 boundary (64 bytes) --- */ > struct dsa_port * cpu_dp; /* 64 8 */ > u8 mac[6]; /* 72 6 */ > u8 stp_state; /* 78 1 */ > u8 vlan_filtering:1; /* 79: 0 1 */ > u8 learning:1; /* 79: 1 1 */ > u8 lag_tx_enabled:1; /* 79: 2 1 */ > u8 devlink_port_setup:1; /* 79: 3 1 */ > u8 setup:1; /* 79: 4 1 */ > > /* XXX 3 bits hole, try to pack */ > > struct device_node * dn; /* 80 8 */ > unsigned int ageing_time; /* 88 4 */ > > /* XXX 4 bytes hole, try to pack */ > > struct dsa_bridge * bridge; /* 96 8 */ > struct devlink_port devlink_port; /* 104 288 */ > /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ > struct phylink * pl; /* 392 8 */ > struct phylink_config pl_config; /* 400 40 */ > struct net_device * lag_dev; /* 440 8 */ > /* --- cacheline 7 boundary (448 bytes) --- */ > struct net_device * hsr_dev; /* 448 8 */ > struct list_head list; /* 456 16 */ > const struct ethtool_ops * orig_ethtool_ops; /* 472 8 */ > const struct dsa_netdevice_ops * netdev_ops; /* 480 8 */ > struct mutex addr_lists_lock; /* 488 32 */ > /* --- cacheline 8 boundary (512 bytes) was 8 bytes ago --- */ > struct list_head fdbs; /* 520 16 */ > struct list_head mdbs; /* 536 16 */ > > /* size: 552, cachelines: 9, members: 30 */ > /* sum members: 539, holes: 3, sum holes: 12 */ > /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */ > /* last cacheline: 40 bytes */ > }; > > After: > > pahole -C dsa_port net/dsa/slave.o > struct dsa_port { > union { > struct net_device * master; /* 0 8 */ > struct net_device * slave; /* 0 8 */ > }; /* 0 8 */ > const struct dsa_device_ops * tag_ops; /* 8 8 */ > struct dsa_switch_tree * dst; /* 16 8 */ > struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ > struct dsa_switch * ds; /* 32 8 */ > unsigned int index; /* 40 4 */ > enum { > DSA_PORT_TYPE_UNUSED = 0, > DSA_PORT_TYPE_CPU = 1, > DSA_PORT_TYPE_DSA = 2, > DSA_PORT_TYPE_USER = 3, > } type; /* 44 4 */ > const char * name; /* 48 8 */ > struct dsa_port * cpu_dp; /* 56 8 */ > /* --- cacheline 1 boundary (64 bytes) --- */ > u8 mac[6]; /* 64 6 */ > u8 stp_state; /* 70 1 */ > u8 vlan_filtering:1; /* 71: 0 1 */ > u8 learning:1; /* 71: 1 1 */ > u8 lag_tx_enabled:1; /* 71: 2 1 */ > u8 devlink_port_setup:1; /* 71: 3 1 */ > u8 setup:1; /* 71: 4 1 */ > > /* XXX 3 bits hole, try to pack */ > > struct device_node * dn; /* 72 8 */ > unsigned int ageing_time; /* 80 4 */ > > /* XXX 4 bytes hole, try to pack */ > > struct dsa_bridge * bridge; /* 88 8 */ > struct devlink_port devlink_port; /* 96 288 */ > /* --- cacheline 6 boundary (384 bytes) --- */ > struct phylink * pl; /* 384 8 */ > struct phylink_config pl_config; /* 392 40 */ > struct net_device * lag_dev; /* 432 8 */ > struct net_device * hsr_dev; /* 440 8 */ > /* --- cacheline 7 boundary (448 bytes) --- */ > struct list_head list; /* 448 16 */ > const struct ethtool_ops * orig_ethtool_ops; /* 464 8 */ > const struct dsa_netdevice_ops * netdev_ops; /* 472 8 */ > struct mutex addr_lists_lock; /* 480 32 */ > /* --- cacheline 8 boundary (512 bytes) --- */ > struct list_head fdbs; /* 512 16 */ > struct list_head mdbs; /* 528 16 */ > > /* size: 544, cachelines: 9, members: 30 */ > /* sum members: 539, holes: 1, sum holes: 4 */ > /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */ > /* last cacheline: 32 bytes */ > }; > > Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
diff --git a/include/net/dsa.h b/include/net/dsa.h index a8f0037b58e2..5e42fa7ea377 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -246,6 +246,10 @@ struct dsa_port { struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); + struct dsa_switch *ds; + + unsigned int index; + enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU, @@ -253,8 +257,6 @@ struct dsa_port { DSA_PORT_TYPE_USER, } type; - struct dsa_switch *ds; - unsigned int index; const char *name; struct dsa_port *cpu_dp; u8 mac[ETH_ALEN];
Both dsa_port :: type and dsa_port :: index introduce a 4 octet hole after them, so we can group them together and the holes would be eliminated, turning 16 octets of storage into just 8. This makes the cpu_dp pointer fit in the first cache line, which is good, because dsa_slave_to_master(), called by dsa_enqueue_skb(), uses it. Before: pahole -C dsa_port net/dsa/slave.o struct dsa_port { union { struct net_device * master; /* 0 8 */ struct net_device * slave; /* 0 8 */ }; /* 0 8 */ const struct dsa_device_ops * tag_ops; /* 8 8 */ struct dsa_switch_tree * dst; /* 16 8 */ struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU = 1, DSA_PORT_TYPE_DSA = 2, DSA_PORT_TYPE_USER = 3, } type; /* 32 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_switch * ds; /* 40 8 */ unsigned int index; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ const char * name; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct dsa_port * cpu_dp; /* 64 8 */ u8 mac[6]; /* 72 6 */ u8 stp_state; /* 78 1 */ u8 vlan_filtering:1; /* 79: 0 1 */ u8 learning:1; /* 79: 1 1 */ u8 lag_tx_enabled:1; /* 79: 2 1 */ u8 devlink_port_setup:1; /* 79: 3 1 */ u8 setup:1; /* 79: 4 1 */ /* XXX 3 bits hole, try to pack */ struct device_node * dn; /* 80 8 */ unsigned int ageing_time; /* 88 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_bridge * bridge; /* 96 8 */ struct devlink_port devlink_port; /* 104 288 */ /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */ struct phylink * pl; /* 392 8 */ struct phylink_config pl_config; /* 400 40 */ struct net_device * lag_dev; /* 440 8 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct net_device * hsr_dev; /* 448 8 */ struct list_head list; /* 456 16 */ const struct ethtool_ops * orig_ethtool_ops; /* 472 8 */ const struct dsa_netdevice_ops * netdev_ops; /* 480 8 */ struct mutex addr_lists_lock; /* 488 32 */ /* --- cacheline 8 boundary (512 bytes) was 8 bytes ago --- */ struct list_head fdbs; /* 520 16 */ struct list_head mdbs; /* 536 16 */ /* size: 552, cachelines: 9, members: 30 */ /* sum members: 539, holes: 3, sum holes: 12 */ /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */ /* last cacheline: 40 bytes */ }; After: pahole -C dsa_port net/dsa/slave.o struct dsa_port { union { struct net_device * master; /* 0 8 */ struct net_device * slave; /* 0 8 */ }; /* 0 8 */ const struct dsa_device_ops * tag_ops; /* 8 8 */ struct dsa_switch_tree * dst; /* 16 8 */ struct sk_buff * (*rcv)(struct sk_buff *, struct net_device *); /* 24 8 */ struct dsa_switch * ds; /* 32 8 */ unsigned int index; /* 40 4 */ enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU = 1, DSA_PORT_TYPE_DSA = 2, DSA_PORT_TYPE_USER = 3, } type; /* 44 4 */ const char * name; /* 48 8 */ struct dsa_port * cpu_dp; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ u8 mac[6]; /* 64 6 */ u8 stp_state; /* 70 1 */ u8 vlan_filtering:1; /* 71: 0 1 */ u8 learning:1; /* 71: 1 1 */ u8 lag_tx_enabled:1; /* 71: 2 1 */ u8 devlink_port_setup:1; /* 71: 3 1 */ u8 setup:1; /* 71: 4 1 */ /* XXX 3 bits hole, try to pack */ struct device_node * dn; /* 72 8 */ unsigned int ageing_time; /* 80 4 */ /* XXX 4 bytes hole, try to pack */ struct dsa_bridge * bridge; /* 88 8 */ struct devlink_port devlink_port; /* 96 288 */ /* --- cacheline 6 boundary (384 bytes) --- */ struct phylink * pl; /* 384 8 */ struct phylink_config pl_config; /* 392 40 */ struct net_device * lag_dev; /* 432 8 */ struct net_device * hsr_dev; /* 440 8 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct list_head list; /* 448 16 */ const struct ethtool_ops * orig_ethtool_ops; /* 464 8 */ const struct dsa_netdevice_ops * netdev_ops; /* 472 8 */ struct mutex addr_lists_lock; /* 480 32 */ /* --- cacheline 8 boundary (512 bytes) --- */ struct list_head fdbs; /* 512 16 */ struct list_head mdbs; /* 528 16 */ /* size: 544, cachelines: 9, members: 30 */ /* sum members: 539, holes: 1, sum holes: 4 */ /* sum bitfield members: 5 bits, bit holes: 1, sum bit holes: 3 bits */ /* last cacheline: 32 bytes */ }; Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> --- include/net/dsa.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)