diff mbox series

[v2,net-next,4/7] net: dsa: merge all bools of struct dsa_switch into a single u32

Message ID 20220105132141.2648876-5-vladimir.oltean@nxp.com (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series Cleanup to main DSA structures | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 18 this patch: 18
netdev/cc_maintainers warning 1 maintainers not CCed: olteanv@gmail.com
netdev/build_clang success Errors and warnings before: 26 this patch: 26
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 20 this patch: 20
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 115 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Vladimir Oltean Jan. 5, 2022, 1:21 p.m. UTC
struct dsa_switch has 9 boolean properties, many of which are in fact
set by drivers for custom behavior (vlan_filtering_is_global,
needs_standalone_vlan_filtering, etc etc). The binary layout of the
structure could be improved. For example, the "bool setup" at the
beginning introduces a gratuitous 7 byte hole in the first cache line.

The change merges all boolean properties into bitfields of an u32, and
places that u32 in the first cache line of the structure, since many
bools are accessed from the data path (untag_bridge_pvid, vlan_filtering,
vlan_filtering_is_global).

We place this u32 after the existing ds->index, which is also 4 bytes in
size. As a positive side effect, ds->tagger_data now fits into the first
cache line too, because 4 bytes are saved.

Before:

pahole -C dsa_switch net/dsa/slave.o
struct dsa_switch {
        bool                       setup;                /*     0     1 */

        /* XXX 7 bytes hole, try to pack */

        struct device *            dev;                  /*     8     8 */
        struct dsa_switch_tree *   dst;                  /*    16     8 */
        unsigned int               index;                /*    24     4 */

        /* XXX 4 bytes hole, try to pack */

        struct notifier_block      nb;                   /*    32    24 */

        /* XXX last struct has 4 bytes of padding */

        void *                     priv;                 /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        void *                     tagger_data;          /*    64     8 */
        struct dsa_chip_data *     cd;                   /*    72     8 */
        const struct dsa_switch_ops  * ops;              /*    80     8 */
        u32                        phys_mii_mask;        /*    88     4 */

        /* XXX 4 bytes hole, try to pack */

        struct mii_bus *           slave_mii_bus;        /*    96     8 */
        unsigned int               ageing_time_min;      /*   104     4 */
        unsigned int               ageing_time_max;      /*   108     4 */
        struct dsa_8021q_context * tag_8021q_ctx;        /*   112     8 */
        struct devlink *           devlink;              /*   120     8 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        unsigned int               num_tx_queues;        /*   128     4 */
        bool                       vlan_filtering_is_global; /*   132     1 */
        bool                       needs_standalone_vlan_filtering; /*   133     1 */
        bool                       configure_vlan_while_not_filtering; /*   134     1 */
        bool                       untag_bridge_pvid;    /*   135     1 */
        bool                       assisted_learning_on_cpu_port; /*   136     1 */
        bool                       vlan_filtering;       /*   137     1 */
        bool                       pcs_poll;             /*   138     1 */
        bool                       mtu_enforcement_ingress; /*   139     1 */
        unsigned int               num_lag_ids;          /*   140     4 */
        unsigned int               max_num_bridges;      /*   144     4 */

        /* XXX 4 bytes hole, try to pack */

        size_t                     num_ports;            /*   152     8 */

        /* size: 160, cachelines: 3, members: 27 */
        /* sum members: 141, holes: 4, sum holes: 19 */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 32 bytes */
};

After:

pahole -C dsa_switch net/dsa/slave.o
struct dsa_switch {
        struct device *            dev;                  /*     0     8 */
        struct dsa_switch_tree *   dst;                  /*     8     8 */
        unsigned int               index;                /*    16     4 */
        u32                        setup:1;              /*    20: 0  4 */
        u32                        vlan_filtering_is_global:1; /*    20: 1  4 */
        u32                        needs_standalone_vlan_filtering:1; /*    20: 2  4 */
        u32                        configure_vlan_while_not_filtering:1; /*    20: 3  4 */
        u32                        untag_bridge_pvid:1;  /*    20: 4  4 */
        u32                        assisted_learning_on_cpu_port:1; /*    20: 5  4 */
        u32                        vlan_filtering:1;     /*    20: 6  4 */
        u32                        pcs_poll:1;           /*    20: 7  4 */
        u32                        mtu_enforcement_ingress:1; /*    20: 8  4 */

        /* XXX 23 bits hole, try to pack */

        struct notifier_block      nb;                   /*    24    24 */

        /* XXX last struct has 4 bytes of padding */

        void *                     priv;                 /*    48     8 */
        void *                     tagger_data;          /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        struct dsa_chip_data *     cd;                   /*    64     8 */
        const struct dsa_switch_ops  * ops;              /*    72     8 */
        u32                        phys_mii_mask;        /*    80     4 */

        /* XXX 4 bytes hole, try to pack */

        struct mii_bus *           slave_mii_bus;        /*    88     8 */
        unsigned int               ageing_time_min;      /*    96     4 */
        unsigned int               ageing_time_max;      /*   100     4 */
        struct dsa_8021q_context * tag_8021q_ctx;        /*   104     8 */
        struct devlink *           devlink;              /*   112     8 */
        unsigned int               num_tx_queues;        /*   120     4 */
        unsigned int               num_lag_ids;          /*   124     4 */
        /* --- cacheline 2 boundary (128 bytes) --- */
        unsigned int               max_num_bridges;      /*   128     4 */

        /* XXX 4 bytes hole, try to pack */

        size_t                     num_ports;            /*   136     8 */

        /* size: 144, cachelines: 3, members: 27 */
        /* sum members: 132, holes: 2, sum holes: 8 */
        /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 16 bytes */
};

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
 include/net/dsa.h | 97 +++++++++++++++++++++++++----------------------
 1 file changed, 51 insertions(+), 46 deletions(-)

Comments

Florian Fainelli Jan. 5, 2022, 6:32 p.m. UTC | #1
On 1/5/22 5:21 AM, Vladimir Oltean wrote:
> struct dsa_switch has 9 boolean properties, many of which are in fact
> set by drivers for custom behavior (vlan_filtering_is_global,
> needs_standalone_vlan_filtering, etc etc). The binary layout of the
> structure could be improved. For example, the "bool setup" at the
> beginning introduces a gratuitous 7 byte hole in the first cache line.
> 
> The change merges all boolean properties into bitfields of an u32, and
> places that u32 in the first cache line of the structure, since many
> bools are accessed from the data path (untag_bridge_pvid, vlan_filtering,
> vlan_filtering_is_global).
> 
> We place this u32 after the existing ds->index, which is also 4 bytes in
> size. As a positive side effect, ds->tagger_data now fits into the first
> cache line too, because 4 bytes are saved.
> 
> Before:
> 
> pahole -C dsa_switch net/dsa/slave.o
> struct dsa_switch {
>         bool                       setup;                /*     0     1 */
> 
>         /* XXX 7 bytes hole, try to pack */
> 
>         struct device *            dev;                  /*     8     8 */
>         struct dsa_switch_tree *   dst;                  /*    16     8 */
>         unsigned int               index;                /*    24     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         struct notifier_block      nb;                   /*    32    24 */
> 
>         /* XXX last struct has 4 bytes of padding */
> 
>         void *                     priv;                 /*    56     8 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         void *                     tagger_data;          /*    64     8 */
>         struct dsa_chip_data *     cd;                   /*    72     8 */
>         const struct dsa_switch_ops  * ops;              /*    80     8 */
>         u32                        phys_mii_mask;        /*    88     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         struct mii_bus *           slave_mii_bus;        /*    96     8 */
>         unsigned int               ageing_time_min;      /*   104     4 */
>         unsigned int               ageing_time_max;      /*   108     4 */
>         struct dsa_8021q_context * tag_8021q_ctx;        /*   112     8 */
>         struct devlink *           devlink;              /*   120     8 */
>         /* --- cacheline 2 boundary (128 bytes) --- */
>         unsigned int               num_tx_queues;        /*   128     4 */
>         bool                       vlan_filtering_is_global; /*   132     1 */
>         bool                       needs_standalone_vlan_filtering; /*   133     1 */
>         bool                       configure_vlan_while_not_filtering; /*   134     1 */
>         bool                       untag_bridge_pvid;    /*   135     1 */
>         bool                       assisted_learning_on_cpu_port; /*   136     1 */
>         bool                       vlan_filtering;       /*   137     1 */
>         bool                       pcs_poll;             /*   138     1 */
>         bool                       mtu_enforcement_ingress; /*   139     1 */
>         unsigned int               num_lag_ids;          /*   140     4 */
>         unsigned int               max_num_bridges;      /*   144     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         size_t                     num_ports;            /*   152     8 */
> 
>         /* size: 160, cachelines: 3, members: 27 */
>         /* sum members: 141, holes: 4, sum holes: 19 */
>         /* paddings: 1, sum paddings: 4 */
>         /* last cacheline: 32 bytes */
> };
> 
> After:
> 
> pahole -C dsa_switch net/dsa/slave.o
> struct dsa_switch {
>         struct device *            dev;                  /*     0     8 */
>         struct dsa_switch_tree *   dst;                  /*     8     8 */
>         unsigned int               index;                /*    16     4 */
>         u32                        setup:1;              /*    20: 0  4 */
>         u32                        vlan_filtering_is_global:1; /*    20: 1  4 */
>         u32                        needs_standalone_vlan_filtering:1; /*    20: 2  4 */
>         u32                        configure_vlan_while_not_filtering:1; /*    20: 3  4 */
>         u32                        untag_bridge_pvid:1;  /*    20: 4  4 */
>         u32                        assisted_learning_on_cpu_port:1; /*    20: 5  4 */
>         u32                        vlan_filtering:1;     /*    20: 6  4 */
>         u32                        pcs_poll:1;           /*    20: 7  4 */
>         u32                        mtu_enforcement_ingress:1; /*    20: 8  4 */
> 
>         /* XXX 23 bits hole, try to pack */
> 
>         struct notifier_block      nb;                   /*    24    24 */
> 
>         /* XXX last struct has 4 bytes of padding */
> 
>         void *                     priv;                 /*    48     8 */
>         void *                     tagger_data;          /*    56     8 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         struct dsa_chip_data *     cd;                   /*    64     8 */
>         const struct dsa_switch_ops  * ops;              /*    72     8 */
>         u32                        phys_mii_mask;        /*    80     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         struct mii_bus *           slave_mii_bus;        /*    88     8 */
>         unsigned int               ageing_time_min;      /*    96     4 */
>         unsigned int               ageing_time_max;      /*   100     4 */
>         struct dsa_8021q_context * tag_8021q_ctx;        /*   104     8 */
>         struct devlink *           devlink;              /*   112     8 */
>         unsigned int               num_tx_queues;        /*   120     4 */
>         unsigned int               num_lag_ids;          /*   124     4 */
>         /* --- cacheline 2 boundary (128 bytes) --- */
>         unsigned int               max_num_bridges;      /*   128     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         size_t                     num_ports;            /*   136     8 */
> 
>         /* size: 144, cachelines: 3, members: 27 */
>         /* sum members: 132, holes: 2, sum holes: 8 */
>         /* sum bitfield members: 9 bits, bit holes: 1, sum bit holes: 23 bits */
>         /* paddings: 1, sum paddings: 4 */
>         /* last cacheline: 16 bytes */
> };
> 
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
diff mbox series

Patch

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 5e42fa7ea377..a8a586039033 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -321,8 +321,6 @@  struct dsa_mac_addr {
 };
 
 struct dsa_switch {
-	bool setup;
-
 	struct device *dev;
 
 	/*
@@ -331,6 +329,57 @@  struct dsa_switch {
 	struct dsa_switch_tree	*dst;
 	unsigned int		index;
 
+	u32			setup:1,
+				/* Disallow bridge core from requesting
+				 * different VLAN awareness settings on ports
+				 * if not hardware-supported
+				 */
+				vlan_filtering_is_global:1,
+				/* Keep VLAN filtering enabled on ports not
+				 * offloading any upper
+				 */
+				needs_standalone_vlan_filtering:1,
+				/* Pass .port_vlan_add and .port_vlan_del to
+				 * drivers even for bridges that have
+				 * vlan_filtering=0. All drivers should ideally
+				 * set this (and then the option would get
+				 * removed), but it is unknown whether this
+				 * would break things or not.
+				 */
+				configure_vlan_while_not_filtering:1,
+				/* If the switch driver always programs the CPU
+				 * port as egress tagged despite the VLAN
+				 * configuration indicating otherwise, then
+				 * setting @untag_bridge_pvid will force the
+				 * DSA receive path to pop the bridge's
+				 * default_pvid VLAN tagged frames to offer a
+				 * consistent behavior between a
+				 * vlan_filtering=0 and vlan_filtering=1 bridge
+				 * device.
+				 */
+				untag_bridge_pvid:1,
+				/* Let DSA manage the FDB entries towards the
+				 * CPU, based on the software bridge database.
+				 */
+				assisted_learning_on_cpu_port:1,
+				/* In case vlan_filtering_is_global is set, the
+				 * VLAN awareness state should be retrieved
+				 * from here and not from the per-port
+				 * settings.
+				 */
+				vlan_filtering:1,
+				/* MAC PCS does not provide link state change
+				 * interrupt, and requires polling. Flag passed
+				 * on to PHYLINK.
+				 */
+				pcs_poll:1,
+				/* For switches that only have the MRU
+				 * configurable. To ensure the configured MTU
+				 * is not exceeded, normalization of MRU on all
+				 * bridged interfaces is needed.
+				 */
+				mtu_enforcement_ingress:1;
+
 	/* Listener for switch fabric events */
 	struct notifier_block	nb;
 
@@ -371,50 +420,6 @@  struct dsa_switch {
 	/* Number of switch port queues */
 	unsigned int		num_tx_queues;
 
-	/* Disallow bridge core from requesting different VLAN awareness
-	 * settings on ports if not hardware-supported
-	 */
-	bool			vlan_filtering_is_global;
-
-	/* Keep VLAN filtering enabled on ports not offloading any upper. */
-	bool			needs_standalone_vlan_filtering;
-
-	/* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges
-	 * that have vlan_filtering=0. All drivers should ideally set this (and
-	 * then the option would get removed), but it is unknown whether this
-	 * would break things or not.
-	 */
-	bool			configure_vlan_while_not_filtering;
-
-	/* If the switch driver always programs the CPU port as egress tagged
-	 * despite the VLAN configuration indicating otherwise, then setting
-	 * @untag_bridge_pvid will force the DSA receive path to pop the bridge's
-	 * default_pvid VLAN tagged frames to offer a consistent behavior
-	 * between a vlan_filtering=0 and vlan_filtering=1 bridge device.
-	 */
-	bool			untag_bridge_pvid;
-
-	/* Let DSA manage the FDB entries towards the CPU, based on the
-	 * software bridge database.
-	 */
-	bool			assisted_learning_on_cpu_port;
-
-	/* In case vlan_filtering_is_global is set, the VLAN awareness state
-	 * should be retrieved from here and not from the per-port settings.
-	 */
-	bool			vlan_filtering;
-
-	/* MAC PCS does not provide link state change interrupt, and requires
-	 * polling. Flag passed on to PHYLINK.
-	 */
-	bool			pcs_poll;
-
-	/* For switches that only have the MRU configurable. To ensure the
-	 * configured MTU is not exceeded, normalization of MRU on all bridged
-	 * interfaces is needed.
-	 */
-	bool			mtu_enforcement_ingress;
-
 	/* Drivers that benefit from having an ID associated with each
 	 * offloaded LAG should set this to the maximum number of
 	 * supported IDs. DSA will then maintain a mapping of _at