diff mbox series

btrfs: reorder extent buffer members for better packing

Message ID 20201103211101.4221-1-dsterba@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: reorder extent buffer members for better packing | expand

Commit Message

David Sterba Nov. 3, 2020, 9:11 p.m. UTC
After the rwsem replaced the tree lock implementation, the extent buffer
got smaller but leaving some holes behind. By changing log_index type
and reordering, we can squeeze the size further to 240 bytes, measured on
release config on x86_64. Log_index spans only 3 values and needs to be
signed.

Before:

struct extent_buffer {
        u64                        start;                /*     0     8 */
        long unsigned int          len;                  /*     8     8 */
        long unsigned int          bflags;               /*    16     8 */
        struct btrfs_fs_info *     fs_info;              /*    24     8 */
        spinlock_t                 refs_lock;            /*    32     4 */
        atomic_t                   refs;                 /*    36     4 */
        atomic_t                   io_pages;             /*    40     4 */
        int                        read_mirror;          /*    44     4 */
        struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        pid_t                      lock_owner;           /*    64     4 */
        bool                       lock_recursed;        /*    68     1 */

        /* XXX 3 bytes hole, try to pack */

        struct rw_semaphore        lock;                 /*    72    40 */
        short int                  log_index;            /*   112     2 */

        /* XXX 6 bytes hole, try to pack */

        struct page *              pages[16];            /*   120   128 */

        /* size: 248, cachelines: 4, members: 14 */
        /* sum members: 239, holes: 2, sum holes: 9 */
        /* forced alignments: 1 */
        /* last cacheline: 56 bytes */
} __attribute__((__aligned__(8)));

After:

struct extent_buffer {
        u64                        start;                /*     0     8 */
        long unsigned int          len;                  /*     8     8 */
        long unsigned int          bflags;               /*    16     8 */
        struct btrfs_fs_info *     fs_info;              /*    24     8 */
        spinlock_t                 refs_lock;            /*    32     4 */
        atomic_t                   refs;                 /*    36     4 */
        atomic_t                   io_pages;             /*    40     4 */
        int                        read_mirror;          /*    44     4 */
        struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        pid_t                      lock_owner;           /*    64     4 */
        bool                       lock_recursed;        /*    68     1 */
        s8                         log_index;            /*    69     1 */

        /* XXX 2 bytes hole, try to pack */

        struct rw_semaphore        lock;                 /*    72    40 */
        struct page *              pages[16];            /*   112   128 */

        /* size: 240, cachelines: 4, members: 14 */
        /* sum members: 238, holes: 1, sum holes: 2 */
        /* forced alignments: 1 */
        /* last cacheline: 48 bytes */
} __attribute__((__aligned__(8)));

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Comments

Amy Parker Nov. 3, 2020, 9:25 p.m. UTC | #1
On Tue, Nov 3, 2020 at 1:18 PM David Sterba <dsterba@suse.com> wrote:
>
> After the rwsem replaced the tree lock implementation, the extent buffer
> got smaller but leaving some holes behind. By changing log_index type
> and reordering, we can squeeze the size further to 240 bytes, measured on
> release config on x86_64. Log_index spans only 3 values and needs to be
> signed.
>

Sounds great!

> Before:
>
> struct extent_buffer {
>         u64                        start;                /*     0     8 */
>         long unsigned int          len;                  /*     8     8 */
>         long unsigned int          bflags;               /*    16     8 */
>         struct btrfs_fs_info *     fs_info;              /*    24     8 */
>         spinlock_t                 refs_lock;            /*    32     4 */
>         atomic_t                   refs;                 /*    36     4 */
>         atomic_t                   io_pages;             /*    40     4 */
>         int                        read_mirror;          /*    44     4 */
>         struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         pid_t                      lock_owner;           /*    64     4 */
>         bool                       lock_recursed;        /*    68     1 */
>
>         /* XXX 3 bytes hole, try to pack */
>
>         struct rw_semaphore        lock;                 /*    72    40 */
>         short int                  log_index;            /*   112     2 */
>
>         /* XXX 6 bytes hole, try to pack */
>
>         struct page *              pages[16];            /*   120   128 */
>
>         /* size: 248, cachelines: 4, members: 14 */
>         /* sum members: 239, holes: 2, sum holes: 9 */
>         /* forced alignments: 1 */
>         /* last cacheline: 56 bytes */
> } __attribute__((__aligned__(8)));
>
> After:
>
> struct extent_buffer {
>         u64                        start;                /*     0     8 */
>         long unsigned int          len;                  /*     8     8 */
>         long unsigned int          bflags;               /*    16     8 */
>         struct btrfs_fs_info *     fs_info;              /*    24     8 */
>         spinlock_t                 refs_lock;            /*    32     4 */
>         atomic_t                   refs;                 /*    36     4 */
>         atomic_t                   io_pages;             /*    40     4 */
>         int                        read_mirror;          /*    44     4 */
>         struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         pid_t                      lock_owner;           /*    64     4 */
>         bool                       lock_recursed;        /*    68     1 */
>         s8                         log_index;            /*    69     1 */
>
>         /* XXX 2 bytes hole, try to pack */
>
>         struct rw_semaphore        lock;                 /*    72    40 */
>         struct page *              pages[16];            /*   112   128 */
>
>         /* size: 240, cachelines: 4, members: 14 */
>         /* sum members: 238, holes: 1, sum holes: 2 */
>         /* forced alignments: 1 */
>         /* last cacheline: 48 bytes */
> } __attribute__((__aligned__(8)));

Looks alright, although based on the rest of the value types,
you may want to leave a comment for new btrfs devs about
the change, as well as a reference to this thread.

>
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>  fs/btrfs/extent_io.h | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index 5403354de0e1..3c2bf21c54eb 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -88,10 +88,10 @@ struct extent_buffer {
>         struct rcu_head rcu_head;
>         pid_t lock_owner;
>         bool lock_recursed;
> -       struct rw_semaphore lock;
> -
>         /* >= 0 if eb belongs to a log tree, -1 otherwise */
> -       short log_index;
> +       s8 log_index;
> +
> +       struct rw_semaphore lock;
>
>         struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
>  #ifdef CONFIG_BTRFS_DEBUG
> --
> 2.25.0
>

Functional-wise, everything seems great here. Again, as mentioned,
you may want to include a comment explaining the change and the
divergence from other types used in the code above.

Best regards,
Amy Parker
(they/them)
Qu Wenruo Nov. 3, 2020, 11:44 p.m. UTC | #2
On 2020/11/4 上午5:11, David Sterba wrote:
> After the rwsem replaced the tree lock implementation, the extent buffer
> got smaller but leaving some holes behind. By changing log_index type
> and reordering, we can squeeze the size further to 240 bytes, measured on
> release config on x86_64. Log_index spans only 3 values and needs to be
> signed.
> 
> Before:
> 
> struct extent_buffer {
>         u64                        start;                /*     0     8 */
>         long unsigned int          len;                  /*     8     8 */
>         long unsigned int          bflags;               /*    16     8 */
>         struct btrfs_fs_info *     fs_info;              /*    24     8 */
>         spinlock_t                 refs_lock;            /*    32     4 */
>         atomic_t                   refs;                 /*    36     4 */
>         atomic_t                   io_pages;             /*    40     4 */
>         int                        read_mirror;          /*    44     4 */
>         struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         pid_t                      lock_owner;           /*    64     4 */
>         bool                       lock_recursed;        /*    68     1 */
> 
>         /* XXX 3 bytes hole, try to pack */
> 
>         struct rw_semaphore        lock;                 /*    72    40 */

An off-topic question, for things like aotmic_t/spinlock_t and
rw_semaphore, wouldn't various DEBUG options change their size?

Do we need to consider such case, by moving them to the end of the
structure, or we only consider production build for pa_hole?

Thanks,
Qu

>         short int                  log_index;            /*   112     2 */
> 
>         /* XXX 6 bytes hole, try to pack */
> 
>         struct page *              pages[16];            /*   120   128 */
> 
>         /* size: 248, cachelines: 4, members: 14 */
>         /* sum members: 239, holes: 2, sum holes: 9 */
>         /* forced alignments: 1 */
>         /* last cacheline: 56 bytes */
> } __attribute__((__aligned__(8)));
> 
> After:
> 
> struct extent_buffer {
>         u64                        start;                /*     0     8 */
>         long unsigned int          len;                  /*     8     8 */
>         long unsigned int          bflags;               /*    16     8 */
>         struct btrfs_fs_info *     fs_info;              /*    24     8 */
>         spinlock_t                 refs_lock;            /*    32     4 */
>         atomic_t                   refs;                 /*    36     4 */
>         atomic_t                   io_pages;             /*    40     4 */
>         int                        read_mirror;          /*    44     4 */
>         struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         pid_t                      lock_owner;           /*    64     4 */
>         bool                       lock_recursed;        /*    68     1 */
>         s8                         log_index;            /*    69     1 */
> 
>         /* XXX 2 bytes hole, try to pack */
> 
>         struct rw_semaphore        lock;                 /*    72    40 */
>         struct page *              pages[16];            /*   112   128 */
> 
>         /* size: 240, cachelines: 4, members: 14 */
>         /* sum members: 238, holes: 1, sum holes: 2 */
>         /* forced alignments: 1 */
>         /* last cacheline: 48 bytes */
> } __attribute__((__aligned__(8)));
> 
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>  fs/btrfs/extent_io.h | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index 5403354de0e1..3c2bf21c54eb 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -88,10 +88,10 @@ struct extent_buffer {
>  	struct rcu_head rcu_head;
>  	pid_t lock_owner;
>  	bool lock_recursed;
> -	struct rw_semaphore lock;
> -
>  	/* >= 0 if eb belongs to a log tree, -1 otherwise */
> -	short log_index;
> +	s8 log_index;
> +
> +	struct rw_semaphore lock;
>  
>  	struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
>  #ifdef CONFIG_BTRFS_DEBUG
>
Amy Parker Nov. 4, 2020, 3:55 a.m. UTC | #3
On Tue, Nov 3, 2020 at 3:45 PM Qu Wenruo <quwenruo.btrfs@gmx.com> wrote:
>
>
>
> On 2020/11/4 上午5:11, David Sterba wrote:
> > After the rwsem replaced the tree lock implementation, the extent buffer
> > got smaller but leaving some holes behind. By changing log_index type
> > and reordering, we can squeeze the size further to 240 bytes, measured on
> > release config on x86_64. Log_index spans only 3 values and needs to be
> > signed.
> >
> > Before:
> >
> > struct extent_buffer {
> >         u64                        start;                /*     0     8 */
> >         long unsigned int          len;                  /*     8     8 */
> >         long unsigned int          bflags;               /*    16     8 */
> >         struct btrfs_fs_info *     fs_info;              /*    24     8 */
> >         spinlock_t                 refs_lock;            /*    32     4 */
> >         atomic_t                   refs;                 /*    36     4 */
> >         atomic_t                   io_pages;             /*    40     4 */
> >         int                        read_mirror;          /*    44     4 */
> >         struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
> >         /* --- cacheline 1 boundary (64 bytes) --- */
> >         pid_t                      lock_owner;           /*    64     4 */
> >         bool                       lock_recursed;        /*    68     1 */
> >
> >         /* XXX 3 bytes hole, try to pack */
> >
> >         struct rw_semaphore        lock;                 /*    72    40 */
>
> An off-topic question, for things like aotmic_t/spinlock_t and
> rw_semaphore, wouldn't various DEBUG options change their size?

Yes, I believe they could.

>
> Do we need to consider such case, by moving them to the end of the
> structure, or we only consider production build for pa_hole?
>

I'd say we could go either way on this, but personally, if it builds, it builds.
Let's just focus on pahole for now and potentially revisit this later.

> Thanks,
> Qu
>
> >         short int                  log_index;            /*   112     2 */
> >
> >         /* XXX 6 bytes hole, try to pack */
> >
> >         struct page *              pages[16];            /*   120   128 */
> >
> >         /* size: 248, cachelines: 4, members: 14 */
> >         /* sum members: 239, holes: 2, sum holes: 9 */
> >         /* forced alignments: 1 */
> >         /* last cacheline: 56 bytes */
> > } __attribute__((__aligned__(8)));
> >
> > After:
> >
> > struct extent_buffer {
> >         u64                        start;                /*     0     8 */
> >         long unsigned int          len;                  /*     8     8 */
> >         long unsigned int          bflags;               /*    16     8 */
> >         struct btrfs_fs_info *     fs_info;              /*    24     8 */
> >         spinlock_t                 refs_lock;            /*    32     4 */
> >         atomic_t                   refs;                 /*    36     4 */
> >         atomic_t                   io_pages;             /*    40     4 */
> >         int                        read_mirror;          /*    44     4 */
> >         struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
> >         /* --- cacheline 1 boundary (64 bytes) --- */
> >         pid_t                      lock_owner;           /*    64     4 */
> >         bool                       lock_recursed;        /*    68     1 */
> >         s8                         log_index;            /*    69     1 */
> >
> >         /* XXX 2 bytes hole, try to pack */
> >
> >         struct rw_semaphore        lock;                 /*    72    40 */
> >         struct page *              pages[16];            /*   112   128 */
> >
> >         /* size: 240, cachelines: 4, members: 14 */
> >         /* sum members: 238, holes: 1, sum holes: 2 */
> >         /* forced alignments: 1 */
> >         /* last cacheline: 48 bytes */
> > } __attribute__((__aligned__(8)));
> >
> > Signed-off-by: David Sterba <dsterba@suse.com>
> > ---
> >  fs/btrfs/extent_io.h | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> > index 5403354de0e1..3c2bf21c54eb 100644
> > --- a/fs/btrfs/extent_io.h
> > +++ b/fs/btrfs/extent_io.h
> > @@ -88,10 +88,10 @@ struct extent_buffer {
> >       struct rcu_head rcu_head;
> >       pid_t lock_owner;
> >       bool lock_recursed;
> > -     struct rw_semaphore lock;
> > -
> >       /* >= 0 if eb belongs to a log tree, -1 otherwise */
> > -     short log_index;
> > +     s8 log_index;
> > +
> > +     struct rw_semaphore lock;
> >
> >       struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
> >  #ifdef CONFIG_BTRFS_DEBUG
> >
>

Best regards,
Amy Parker
(they/them)
David Sterba Nov. 4, 2020, 3:53 p.m. UTC | #4
On Wed, Nov 04, 2020 at 07:44:33AM +0800, Qu Wenruo wrote:
> On 2020/11/4 上午5:11, David Sterba wrote:
> > After the rwsem replaced the tree lock implementation, the extent buffer
> > got smaller but leaving some holes behind. By changing log_index type
> > and reordering, we can squeeze the size further to 240 bytes, measured on
> > release config on x86_64. Log_index spans only 3 values and needs to be
> > signed.
> > 
> > Before:
> > 
> > struct extent_buffer {
> >         u64                        start;                /*     0     8 */
> >         long unsigned int          len;                  /*     8     8 */
> >         long unsigned int          bflags;               /*    16     8 */
> >         struct btrfs_fs_info *     fs_info;              /*    24     8 */
> >         spinlock_t                 refs_lock;            /*    32     4 */
> >         atomic_t                   refs;                 /*    36     4 */
> >         atomic_t                   io_pages;             /*    40     4 */
> >         int                        read_mirror;          /*    44     4 */
> >         struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
> >         /* --- cacheline 1 boundary (64 bytes) --- */
> >         pid_t                      lock_owner;           /*    64     4 */
> >         bool                       lock_recursed;        /*    68     1 */
> > 
> >         /* XXX 3 bytes hole, try to pack */
> > 
> >         struct rw_semaphore        lock;                 /*    72    40 */
> 
> An off-topic question, for things like aotmic_t/spinlock_t and
> rw_semaphore, wouldn't various DEBUG options change their size?

Yes they do. For example spinlock_t is 4 bytes on release config and 72
on debug. Semaphore is 40 vs 168. Atomic_t is 4 bytes always, it's just
an int.

> Do we need to consider such case, by moving them to the end of the
> structure, or we only consider production build for pa_hole?

We should optimize for the release build for structure layout or
cacheline occupation, the debugging options make it unpredictable and it
affects only development. There are way more deployments without
debugging options enabled anyway.

The resulting size of the structures is also bigger so this has
completely different slab allocation pattern and performance
characteristics.

Here's the layout of eb on the debug config I use:

struct extent_buffer {
        u64                        start;                /*     0     8 */
        long unsigned int          len;                  /*     8     8 */
        long unsigned int          bflags;               /*    16     8 */
        struct btrfs_fs_info *     fs_info;              /*    24     8 */
        spinlock_t                 refs_lock;            /*    32    72 */
        /* --- cacheline 1 boundary (64 bytes) was 40 bytes ago --- */
        atomic_t                   refs;                 /*   104     4 */
        atomic_t                   io_pages;             /*   108     4 */
        int                        read_mirror;          /*   112     4 */

        /* XXX 4 bytes hole, try to pack */

        struct callback_head       callback_head __attribute__((__aligned__(8))); /*   120    16 */
        /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
        pid_t                      lock_owner;           /*   136     4 */
        bool                       lock_recursed;        /*   140     1 */
        s8                         log_index;            /*   141     1 */

        /* XXX 2 bytes hole, try to pack */

        struct rw_semaphore        lock;                 /*   144   168 */
        /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */
        struct page *              pages[16];            /*   312   128 */
        /* --- cacheline 6 boundary (384 bytes) was 56 bytes ago --- */
        struct list_head           leak_list;            /*   440    16 */

        /* size: 456, cachelines: 8, members: 15 */
        /* sum members: 450, holes: 2, sum holes: 6 */
        /* forced alignments: 1, forced holes: 1, sum forced holes: 4 */
        /* last cacheline: 8 bytes */
} __attribute__((__aligned__(8)));
Amy Parker Nov. 4, 2020, 5:42 p.m. UTC | #5
On Wed, Nov 4, 2020 at 7:59 AM David Sterba <dsterba@suse.cz> wrote:
>
> On Wed, Nov 04, 2020 at 07:44:33AM +0800, Qu Wenruo wrote:
> > On 2020/11/4 上午5:11, David Sterba wrote:
> > > After the rwsem replaced the tree lock implementation, the extent buffer
> > > got smaller but leaving some holes behind. By changing log_index type
> > > and reordering, we can squeeze the size further to 240 bytes, measured on
> > > release config on x86_64. Log_index spans only 3 values and needs to be
> > > signed.
> > >
> > > Before:
> > >
> > > struct extent_buffer {
> > >         u64                        start;                /*     0     8 */
> > >         long unsigned int          len;                  /*     8     8 */
> > >         long unsigned int          bflags;               /*    16     8 */
> > >         struct btrfs_fs_info *     fs_info;              /*    24     8 */
> > >         spinlock_t                 refs_lock;            /*    32     4 */
> > >         atomic_t                   refs;                 /*    36     4 */
> > >         atomic_t                   io_pages;             /*    40     4 */
> > >         int                        read_mirror;          /*    44     4 */
> > >         struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
> > >         /* --- cacheline 1 boundary (64 bytes) --- */
> > >         pid_t                      lock_owner;           /*    64     4 */
> > >         bool                       lock_recursed;        /*    68     1 */
> > >
> > >         /* XXX 3 bytes hole, try to pack */
> > >
> > >         struct rw_semaphore        lock;                 /*    72    40 */
> >
> > An off-topic question, for things like aotmic_t/spinlock_t and
> > rw_semaphore, wouldn't various DEBUG options change their size?
>
> Yes they do. For example spinlock_t is 4 bytes on release config and 72
> on debug. Semaphore is 40 vs 168. Atomic_t is 4 bytes always, it's just
> an int.

These are pretty big differences in byte size. Probably not worth
shifting everything
just for debug configs.

>
> > Do we need to consider such case, by moving them to the end of the
> > structure, or we only consider production build for pa_hole?
>
> We should optimize for the release build for structure layout or
> cacheline occupation, the debugging options make it unpredictable and it
> affects only development. There are way more deployments without
> debugging options enabled anyway.

We could always just leave a comment there noting that it's unpredictable
under debug, and that debugging will require a temporary user-end shift.
Optimizing for production is best, yep.

>
> The resulting size of the structures is also bigger so this has
> completely different slab allocation pattern and performance
> characteristics.

Yeah, another reason we should just focus on production.

>
> Here's the layout of eb on the debug config I use:
>
> struct extent_buffer {
>         u64                        start;                /*     0     8 */
>         long unsigned int          len;                  /*     8     8 */
>         long unsigned int          bflags;               /*    16     8 */
>         struct btrfs_fs_info *     fs_info;              /*    24     8 */
>         spinlock_t                 refs_lock;            /*    32    72 */
>         /* --- cacheline 1 boundary (64 bytes) was 40 bytes ago --- */
>         atomic_t                   refs;                 /*   104     4 */
>         atomic_t                   io_pages;             /*   108     4 */
>         int                        read_mirror;          /*   112     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         struct callback_head       callback_head __attribute__((__aligned__(8))); /*   120    16 */
>         /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
>         pid_t                      lock_owner;           /*   136     4 */
>         bool                       lock_recursed;        /*   140     1 */
>         s8                         log_index;            /*   141     1 */
>
>         /* XXX 2 bytes hole, try to pack */
>
>         struct rw_semaphore        lock;                 /*   144   168 */
>         /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */
>         struct page *              pages[16];            /*   312   128 */
>         /* --- cacheline 6 boundary (384 bytes) was 56 bytes ago --- */
>         struct list_head           leak_list;            /*   440    16 */
>
>         /* size: 456, cachelines: 8, members: 15 */
>         /* sum members: 450, holes: 2, sum holes: 6 */
>         /* forced alignments: 1, forced holes: 1, sum forced holes: 4 */
>         /* last cacheline: 8 bytes */
> } __attribute__((__aligned__(8)));

Best regards,
Amy Parker
(they/them)
Josef Bacik Nov. 5, 2020, 6:12 p.m. UTC | #6
On 11/3/20 4:11 PM, David Sterba wrote:
> After the rwsem replaced the tree lock implementation, the extent buffer
> got smaller but leaving some holes behind. By changing log_index type
> and reordering, we can squeeze the size further to 240 bytes, measured on
> release config on x86_64. Log_index spans only 3 values and needs to be
> signed.
> 
> Before:
> 
> struct extent_buffer {
>          u64                        start;                /*     0     8 */
>          long unsigned int          len;                  /*     8     8 */
>          long unsigned int          bflags;               /*    16     8 */
>          struct btrfs_fs_info *     fs_info;              /*    24     8 */
>          spinlock_t                 refs_lock;            /*    32     4 */
>          atomic_t                   refs;                 /*    36     4 */
>          atomic_t                   io_pages;             /*    40     4 */
>          int                        read_mirror;          /*    44     4 */
>          struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
>          /* --- cacheline 1 boundary (64 bytes) --- */
>          pid_t                      lock_owner;           /*    64     4 */
>          bool                       lock_recursed;        /*    68     1 */
> 
>          /* XXX 3 bytes hole, try to pack */
> 
>          struct rw_semaphore        lock;                 /*    72    40 */
>          short int                  log_index;            /*   112     2 */
> 
>          /* XXX 6 bytes hole, try to pack */
> 
>          struct page *              pages[16];            /*   120   128 */
> 
>          /* size: 248, cachelines: 4, members: 14 */
>          /* sum members: 239, holes: 2, sum holes: 9 */
>          /* forced alignments: 1 */
>          /* last cacheline: 56 bytes */
> } __attribute__((__aligned__(8)));
> 
> After:
> 
> struct extent_buffer {
>          u64                        start;                /*     0     8 */
>          long unsigned int          len;                  /*     8     8 */
>          long unsigned int          bflags;               /*    16     8 */
>          struct btrfs_fs_info *     fs_info;              /*    24     8 */
>          spinlock_t                 refs_lock;            /*    32     4 */
>          atomic_t                   refs;                 /*    36     4 */
>          atomic_t                   io_pages;             /*    40     4 */
>          int                        read_mirror;          /*    44     4 */
>          struct callback_head       callback_head __attribute__((__aligned__(8))); /*    48    16 */
>          /* --- cacheline 1 boundary (64 bytes) --- */
>          pid_t                      lock_owner;           /*    64     4 */
>          bool                       lock_recursed;        /*    68     1 */
>          s8                         log_index;            /*    69     1 */
> 
>          /* XXX 2 bytes hole, try to pack */
> 
>          struct rw_semaphore        lock;                 /*    72    40 */
>          struct page *              pages[16];            /*   112   128 */
> 
>          /* size: 240, cachelines: 4, members: 14 */
>          /* sum members: 238, holes: 1, sum holes: 2 */
>          /* forced alignments: 1 */
>          /* last cacheline: 48 bytes */
> } __attribute__((__aligned__(8)));
> 
> Signed-off-by: David Sterba <dsterba@suse.com>

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef
diff mbox series

Patch

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5403354de0e1..3c2bf21c54eb 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -88,10 +88,10 @@  struct extent_buffer {
 	struct rcu_head rcu_head;
 	pid_t lock_owner;
 	bool lock_recursed;
-	struct rw_semaphore lock;
-
 	/* >= 0 if eb belongs to a log tree, -1 otherwise */
-	short log_index;
+	s8 log_index;
+
+	struct rw_semaphore lock;
 
 	struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
 #ifdef CONFIG_BTRFS_DEBUG