diff mbox series

[09/10] btrfs: reduce size of struct btrfs_ref

Message ID c9b508651bafb53e98b9f194271d4b7b2d309cba.1694126893.git.dsterba@suse.com (mailing list archive)
State New, archived
Headers show
Series Cleanups and struct packing | expand

Commit Message

David Sterba Sept. 7, 2023, 11:09 p.m. UTC
We can reduce two members' size that in turn reduce size of struct
btrfs_ref from 64 to 56 bytes. As the structure is often used as a local
variable several functions reduce their stack usage.

- make enum btrfs_ref_type packed, there are only 4 values

- switch action and its values to a packed enum

Final structure layout:

struct btrfs_ref {
        enum btrfs_ref_type        type;                 /*     0     1 */
        enum btrfs_delayed_ref_action action;            /*     1     1 */
        bool                       skip_qgroup;          /*     2     1 */

        /* XXX 5 bytes hole, try to pack */

        u64                        bytenr;               /*     8     8 */
        u64                        len;                  /*    16     8 */
        u64                        parent;               /*    24     8 */
        union {
                struct btrfs_data_ref data_ref;          /*    32    24 */
                struct btrfs_tree_ref tree_ref;          /*    32    16 */
        };                                               /*    32    24 */

        /* size: 56, cachelines: 1, members: 7 */
        /* sum members: 51, holes: 1, sum holes: 5 */
        /* last cacheline: 56 bytes */
};

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-ref.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

Comments

Qu Wenruo Sept. 8, 2023, 12:06 a.m. UTC | #1
On 2023/9/8 07:09, David Sterba wrote:
> We can reduce two members' size that in turn reduce size of struct
> btrfs_ref from 64 to 56 bytes. As the structure is often used as a local
> variable several functions reduce their stack usage.
>
> - make enum btrfs_ref_type packed, there are only 4 values
>
> - switch action and its values to a packed enum
>
> Final structure layout:
>
> struct btrfs_ref {
>          enum btrfs_ref_type        type;                 /*     0     1 */
>          enum btrfs_delayed_ref_action action;            /*     1     1 */

Considering both type and action have only 4 values, we can further pack
them into a single u8.

Although this means we can not directly use enum type, but u8 type instead.

Thanks,
Qu

>          bool                       skip_qgroup;          /*     2     1 */
>
>          /* XXX 5 bytes hole, try to pack */
>
>          u64                        bytenr;               /*     8     8 */
>          u64                        len;                  /*    16     8 */
>          u64                        parent;               /*    24     8 */
>          union {
>                  struct btrfs_data_ref data_ref;          /*    32    24 */
>                  struct btrfs_tree_ref tree_ref;          /*    32    16 */
>          };                                               /*    32    24 */
>
>          /* size: 56, cachelines: 1, members: 7 */
>          /* sum members: 51, holes: 1, sum holes: 5 */
>          /* last cacheline: 56 bytes */
> };
>
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>   fs/btrfs/delayed-ref.h | 18 ++++++++++++------
>   1 file changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
> index b8e14b0ba5f1..224278d4516f 100644
> --- a/fs/btrfs/delayed-ref.h
> +++ b/fs/btrfs/delayed-ref.h
> @@ -9,10 +9,16 @@
>   #include <linux/refcount.h>
>
>   /* these are the possible values of struct btrfs_delayed_ref_node->action */
> -#define BTRFS_ADD_DELAYED_REF    1 /* add one backref to the tree */
> -#define BTRFS_DROP_DELAYED_REF   2 /* delete one backref from the tree */
> -#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
> -#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
> +enum btrfs_delayed_ref_action {
> +	/* Add one backref to the tree */
> +	BTRFS_ADD_DELAYED_REF = 1,
> +	/* Delete one backref from the tree */
> +	BTRFS_DROP_DELAYED_REF,
> +	/* Record a full extent allocation */
> +	BTRFS_ADD_DELAYED_EXTENT,
> +	/* Not changing ref count on head ref */
> +	BTRFS_UPDATE_DELAYED_HEAD,
> +} __packed;
>
>   struct btrfs_delayed_ref_node {
>   	struct rb_node ref_node;
> @@ -183,7 +189,7 @@ enum btrfs_ref_type {
>   	BTRFS_REF_DATA,
>   	BTRFS_REF_METADATA,
>   	BTRFS_REF_LAST,
> -};
> +} __packed;
>
>   struct btrfs_data_ref {
>   	/* For EXTENT_DATA_REF */
> @@ -223,7 +229,7 @@ struct btrfs_tree_ref {
>
>   struct btrfs_ref {
>   	enum btrfs_ref_type type;
> -	int action;
> +	enum btrfs_delayed_ref_action action;
>
>   	/*
>   	 * Whether this extent should go through qgroup record.
David Sterba Sept. 8, 2023, 12:46 a.m. UTC | #2
On Fri, Sep 08, 2023 at 08:06:04AM +0800, Qu Wenruo wrote:
> 
> 
> On 2023/9/8 07:09, David Sterba wrote:
> > We can reduce two members' size that in turn reduce size of struct
> > btrfs_ref from 64 to 56 bytes. As the structure is often used as a local
> > variable several functions reduce their stack usage.
> >
> > - make enum btrfs_ref_type packed, there are only 4 values
> >
> > - switch action and its values to a packed enum
> >
> > Final structure layout:
> >
> > struct btrfs_ref {
> >          enum btrfs_ref_type        type;                 /*     0     1 */
> >          enum btrfs_delayed_ref_action action;            /*     1     1 */
> 
> Considering both type and action have only 4 values, we can further pack
> them into a single u8.
> 
> Although this means we can not directly use enum type, but u8 type instead.

I consider byte as good enough for enums or bounded types, unless
there's a really good reason to do some ultra packing like in case we
could drop a gap of like 5-7 bytes due to alignment. In case of
btrfs_ref it does not help. With explict bit width this does not reduce
the size (still 56 bytes) and only increase code size:

 struct btrfs_ref {
-       enum btrfs_ref_type        type;                 /*     0     1 */
-       enum btrfs_delayed_ref_action action;            /*     1     1 */
-       bool                       skip_qgroup;          /*     2     1 */
+       enum btrfs_ref_type        type:2;               /*     0: 0  1 */
+       enum btrfs_delayed_ref_action action:3;          /*     0: 2  1 */
+       bool                       skip_qgroup:1;        /*     0: 5  1 */
 
-       /* XXX 5 bytes hole, try to pack */
+       /* XXX 2 bits hole, try to pack */
+       /* XXX 7 bytes hole, try to pack */
 
        u64                        bytenr;               /*     8     8 */
        u64                        len;                  /*    16     8 */
@@ -3116,7 +3117,8 @@ struct btrfs_ref {
        };                                               /*    32    24 */
 
        /* size: 56, cachelines: 1, members: 7 */
-       /* sum members: 51, holes: 1, sum holes: 5 */
+       /* sum members: 48, holes: 1, sum holes: 7 */
+       /* sum bitfield members: 6 bits, bit holes: 1, sum bit holes: 2 bits */
        /* last cacheline: 56 bytes */
 };

Code increase:

   text    data     bss     dec     hex filename
1268477   29813   16088 1314378  140e4a pre/btrfs.ko
1268915   29813   16088 1314816  141000 post/btrfs.ko

DELTA: +438

which means the simple byte comparisons are turned into a few
instructions that first need to mask off the other values, eg.
from btrfs_inc_extent_ref():

test   %al,%al		->		mov    %eax,%edx
					and    $0x3,%edx

so it needs more registers and may prevent some optimizations.
diff mbox series

Patch

diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index b8e14b0ba5f1..224278d4516f 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -9,10 +9,16 @@ 
 #include <linux/refcount.h>
 
 /* these are the possible values of struct btrfs_delayed_ref_node->action */
-#define BTRFS_ADD_DELAYED_REF    1 /* add one backref to the tree */
-#define BTRFS_DROP_DELAYED_REF   2 /* delete one backref from the tree */
-#define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
-#define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
+enum btrfs_delayed_ref_action {
+	/* Add one backref to the tree */
+	BTRFS_ADD_DELAYED_REF = 1,
+	/* Delete one backref from the tree */
+	BTRFS_DROP_DELAYED_REF,
+	/* Record a full extent allocation */
+	BTRFS_ADD_DELAYED_EXTENT,
+	/* Not changing ref count on head ref */
+	BTRFS_UPDATE_DELAYED_HEAD,
+} __packed;
 
 struct btrfs_delayed_ref_node {
 	struct rb_node ref_node;
@@ -183,7 +189,7 @@  enum btrfs_ref_type {
 	BTRFS_REF_DATA,
 	BTRFS_REF_METADATA,
 	BTRFS_REF_LAST,
-};
+} __packed;
 
 struct btrfs_data_ref {
 	/* For EXTENT_DATA_REF */
@@ -223,7 +229,7 @@  struct btrfs_tree_ref {
 
 struct btrfs_ref {
 	enum btrfs_ref_type type;
-	int action;
+	enum btrfs_delayed_ref_action action;
 
 	/*
 	 * Whether this extent should go through qgroup record.