[04/12] btrfs: change how delay_iput is tracked in btrfs_delalloc_work
diff mbox

Message ID a4469a31e3b0cc55ebba9c3aee26cd5632743056.1449161602.git.dsterba@suse.com
State New
Headers show

Commit Message

David Sterba Dec. 3, 2015, 4:56 p.m. UTC
The dellaloc work is not frequently used, the delayed status is once set
and read so it looks quite safe to drop the member and store the status
in the lowest bit of the inode pointer.

Combined with the removal of 'wait' we got +2 objects per 4k slab.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h |  5 ++++-
 fs/btrfs/inode.c | 11 +++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

Comments

Liu Bo Dec. 4, 2015, 2:25 a.m. UTC | #1
On Thu, Dec 03, 2015 at 05:56:32PM +0100, David Sterba wrote:
> The dellaloc work is not frequently used, the delayed status is once set
> and read so it looks quite safe to drop the member and store the status
> in the lowest bit of the inode pointer.
> 
> Combined with the removal of 'wait' we got +2 objects per 4k slab.
> 
> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>  fs/btrfs/ctree.h |  5 ++++-
>  fs/btrfs/inode.c | 11 +++++++++--
>  2 files changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index a61c53bce162..d5e250a65725 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -3901,8 +3901,11 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
>  
>  /* inode.c */
>  struct btrfs_delalloc_work {
> +	/*
> +	 * Note: lowest bit of inode tracks if the iput is delayed,
> +	 * do not access the pointer directly.
> +	 */
>  	struct inode *inode;
> -	int delay_iput;
>  	struct completion completion;
>  	struct list_head list;
>  	struct btrfs_work work;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 15b29e879ffc..529a53b80ca0 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -9436,16 +9436,21 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
>  {
>  	struct btrfs_delalloc_work *delalloc_work;
>  	struct inode *inode;
> +	int delay_iput;
>  
>  	delalloc_work = container_of(work, struct btrfs_delalloc_work,
>  				     work);
>  	inode = delalloc_work->inode;
> +	/* Lowest bit of inode pointer tracks the delayed status */
> +	delay_iput = ((unsigned long)inode & 1UL);
> +	inode = (struct inode *)((unsigned long)inode & ~1UL);
> +

To be quite frankly, I don't like this, it's a pointer anyway, error-prone in a debugging view, instead would 'u8 delayed_iput' help?

Thanks,

-liubo

>  	filemap_flush(inode->i_mapping);
>  	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
>  				&BTRFS_I(inode)->runtime_flags))
>  		filemap_flush(inode->i_mapping);
>  
> -	if (delalloc_work->delay_iput)
> +	if (delay_iput)
>  		btrfs_add_delayed_iput(inode);
>  	else
>  		iput(inode);
> @@ -9464,7 +9469,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
>  	init_completion(&work->completion);
>  	INIT_LIST_HEAD(&work->list);
>  	work->inode = inode;
> -	work->delay_iput = delay_iput;
> +	/* Lowest bit of inode pointer tracks the delayed status */
> +	if (delay_iput)
> +		*((unsigned long *)work->inode) |= 1UL;
>  	WARN_ON_ONCE(!inode);
>  	btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
>  			btrfs_run_delalloc_work, NULL, NULL);
> -- 
> 2.6.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Dec. 4, 2015, 12:36 p.m. UTC | #2
On Thu, Dec 03, 2015 at 06:25:37PM -0800, Liu Bo wrote:
> >  	struct inode *inode;
> > -	int delay_iput;
> >  	struct completion completion;
> >  	struct list_head list;
> >  	struct btrfs_work work;
> > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> > index 15b29e879ffc..529a53b80ca0 100644
> > --- a/fs/btrfs/inode.c
> > +++ b/fs/btrfs/inode.c
> > @@ -9436,16 +9436,21 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
> >  {
> >  	struct btrfs_delalloc_work *delalloc_work;
> >  	struct inode *inode;
> > +	int delay_iput;
> >  
> >  	delalloc_work = container_of(work, struct btrfs_delalloc_work,
> >  				     work);
> >  	inode = delalloc_work->inode;
> > +	/* Lowest bit of inode pointer tracks the delayed status */
> > +	delay_iput = ((unsigned long)inode & 1UL);
> > +	inode = (struct inode *)((unsigned long)inode & ~1UL);
> > +
> 
> To be quite frankly, I don't like this, it's a pointer anyway,
> error-prone in a debugging view, instead would 'u8 delayed_iput' help?

The point was to shrink the structure. Adding the u8 will grow it by
another 8 bytes, besides the slab objects are aligned to 8 bytes by
default so the overall cost of storing the delayed information is 8
bytes:

struct btrfs_delalloc_work {
        struct inode *             inode;                /*     0     8 */
        struct completion          completion;           /*     8    32 */
        struct list_head           list;                 /*    40    16 */
        struct btrfs_work          work;                 /*    56    88 */
        /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
        u8                         delay;                /*   144     1 */

        /* size: 152, cachelines: 3, members: 5 */
        /* padding: 7 */
        /* last cacheline: 24 bytes */
};

As the use of the inode pointer is limited, I don't think this would
cause surprises. And it's commented where used which should help during
debugging.

Abusing the low bits of pointers is nothing new, the page cache tags are
implemented that way. This kind of low-level optimizations is IMO acceptable.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Holger Hoffstätte Dec. 4, 2015, 12:50 p.m. UTC | #3
On 12/04/15 13:36, David Sterba wrote:
[snip]
> As the use of the inode pointer is limited, I don't think this would
> cause surprises. And it's commented where used which should help during
> debugging.

When I read through those bits (mostly pondering portability) I was wondering
whether it might make sense to provide thin wrap/unwrap functions for the tag
bit instead of relying on open code and comments only. Just an idea, not sure
if it's worth the trouble. The code itself is functional and works fine as it
is, I'm running it right now.

-h

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Filipe Manana Dec. 4, 2015, 1:08 p.m. UTC | #4
On Fri, Dec 4, 2015 at 12:36 PM, David Sterba <dsterba@suse.cz> wrote:
> On Thu, Dec 03, 2015 at 06:25:37PM -0800, Liu Bo wrote:
>> >     struct inode *inode;
>> > -   int delay_iput;
>> >     struct completion completion;
>> >     struct list_head list;
>> >     struct btrfs_work work;
>> > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
>> > index 15b29e879ffc..529a53b80ca0 100644
>> > --- a/fs/btrfs/inode.c
>> > +++ b/fs/btrfs/inode.c
>> > @@ -9436,16 +9436,21 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
>> >  {
>> >     struct btrfs_delalloc_work *delalloc_work;
>> >     struct inode *inode;
>> > +   int delay_iput;
>> >
>> >     delalloc_work = container_of(work, struct btrfs_delalloc_work,
>> >                                  work);
>> >     inode = delalloc_work->inode;
>> > +   /* Lowest bit of inode pointer tracks the delayed status */
>> > +   delay_iput = ((unsigned long)inode & 1UL);
>> > +   inode = (struct inode *)((unsigned long)inode & ~1UL);
>> > +
>>
>> To be quite frankly, I don't like this, it's a pointer anyway,
>> error-prone in a debugging view, instead would 'u8 delayed_iput' help?
>
> The point was to shrink the structure. Adding the u8 will grow it by
> another 8 bytes, besides the slab objects are aligned to 8 bytes by
> default so the overall cost of storing the delayed information is 8
> bytes:
>
> struct btrfs_delalloc_work {
>         struct inode *             inode;                /*     0     8 */
>         struct completion          completion;           /*     8    32 */
>         struct list_head           list;                 /*    40    16 */
>         struct btrfs_work          work;                 /*    56    88 */
>         /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
>         u8                         delay;                /*   144     1 */
>
>         /* size: 152, cachelines: 3, members: 5 */
>         /* padding: 7 */
>         /* last cacheline: 24 bytes */
> };
>
> As the use of the inode pointer is limited, I don't think this would
> cause surprises. And it's commented where used which should help during
> debugging.
>
> Abusing the low bits of pointers is nothing new, the page cache tags are
> implemented that way. This kind of low-level optimizations is IMO acceptable.

Acceptable, but, is it needed? I mean, are we using so much memory
with this structure or does the better packing causes any significant
performance improvement to justify this sort of obscure code? IMO it's
worth only when we know (measured) that it actually positively impacts
workloads (either real ones or even artificial ones from benchmark
tools).

> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Dec. 7, 2015, 1:52 p.m. UTC | #5
On Fri, Dec 04, 2015 at 01:08:59PM +0000, Filipe Manana wrote:
> On Fri, Dec 4, 2015 at 12:36 PM, David Sterba <dsterba@suse.cz> wrote:
> > On Thu, Dec 03, 2015 at 06:25:37PM -0800, Liu Bo wrote:
> >> >     struct inode *inode;
> >> > -   int delay_iput;
> >> >     struct completion completion;
> >> >     struct list_head list;
> >> >     struct btrfs_work work;
> >> > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> >> > index 15b29e879ffc..529a53b80ca0 100644
> >> > --- a/fs/btrfs/inode.c
> >> > +++ b/fs/btrfs/inode.c
> >> > @@ -9436,16 +9436,21 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
> >> >  {
> >> >     struct btrfs_delalloc_work *delalloc_work;
> >> >     struct inode *inode;
> >> > +   int delay_iput;
> >> >
> >> >     delalloc_work = container_of(work, struct btrfs_delalloc_work,
> >> >                                  work);
> >> >     inode = delalloc_work->inode;
> >> > +   /* Lowest bit of inode pointer tracks the delayed status */
> >> > +   delay_iput = ((unsigned long)inode & 1UL);
> >> > +   inode = (struct inode *)((unsigned long)inode & ~1UL);
> >> > +
> >>
> >> To be quite frankly, I don't like this, it's a pointer anyway,
> >> error-prone in a debugging view, instead would 'u8 delayed_iput' help?
> >
> > The point was to shrink the structure. Adding the u8 will grow it by
> > another 8 bytes, besides the slab objects are aligned to 8 bytes by
> > default so the overall cost of storing the delayed information is 8
> > bytes:
> >
> > struct btrfs_delalloc_work {
> >         struct inode *             inode;                /*     0     8 */
> >         struct completion          completion;           /*     8    32 */
> >         struct list_head           list;                 /*    40    16 */
> >         struct btrfs_work          work;                 /*    56    88 */
> >         /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
> >         u8                         delay;                /*   144     1 */
> >
> >         /* size: 152, cachelines: 3, members: 5 */
> >         /* padding: 7 */
> >         /* last cacheline: 24 bytes */
> > };
> >
> > As the use of the inode pointer is limited, I don't think this would
> > cause surprises. And it's commented where used which should help during
> > debugging.
> >
> > Abusing the low bits of pointers is nothing new, the page cache tags are
> > implemented that way. This kind of low-level optimizations is IMO acceptable.
> 
> Acceptable, but, is it needed? I mean, are we using so much memory
> with this structure or does the better packing causes any significant
> performance improvement to justify this sort of obscure code? IMO it's
> worth only when we know (measured) that it actually positively impacts
> workloads (either real ones or even artificial ones from benchmark
> tools).

And it turns out this structure is not critical, seldomly used so the
space savings and bit tricks are not justified. Patch dropped.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Dec. 7, 2015, 2:23 p.m. UTC | #6
On Fri, Dec 04, 2015 at 01:50:39PM +0100, Holger Hoffstätte wrote:
> On 12/04/15 13:36, David Sterba wrote:
> [snip]
> > As the use of the inode pointer is limited, I don't think this would
> > cause surprises. And it's commented where used which should help during
> > debugging.
> 
> When I read through those bits (mostly pondering portability) I was wondering
> whether it might make sense to provide thin wrap/unwrap functions for the tag
> bit instead of relying on open code and comments only.

I'm not sure if a helper makes sense for a single occurence of each
operation, a comment seems enough to me. Anyway, the patch will be
dropped.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a61c53bce162..d5e250a65725 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3901,8 +3901,11 @@  void btrfs_extent_item_to_extent_map(struct inode *inode,
 
 /* inode.c */
 struct btrfs_delalloc_work {
+	/*
+	 * Note: lowest bit of inode tracks if the iput is delayed,
+	 * do not access the pointer directly.
+	 */
 	struct inode *inode;
-	int delay_iput;
 	struct completion completion;
 	struct list_head list;
 	struct btrfs_work work;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 15b29e879ffc..529a53b80ca0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9436,16 +9436,21 @@  static void btrfs_run_delalloc_work(struct btrfs_work *work)
 {
 	struct btrfs_delalloc_work *delalloc_work;
 	struct inode *inode;
+	int delay_iput;
 
 	delalloc_work = container_of(work, struct btrfs_delalloc_work,
 				     work);
 	inode = delalloc_work->inode;
+	/* Lowest bit of inode pointer tracks the delayed status */
+	delay_iput = ((unsigned long)inode & 1UL);
+	inode = (struct inode *)((unsigned long)inode & ~1UL);
+
 	filemap_flush(inode->i_mapping);
 	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
 				&BTRFS_I(inode)->runtime_flags))
 		filemap_flush(inode->i_mapping);
 
-	if (delalloc_work->delay_iput)
+	if (delay_iput)
 		btrfs_add_delayed_iput(inode);
 	else
 		iput(inode);
@@ -9464,7 +9469,9 @@  struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
 	init_completion(&work->completion);
 	INIT_LIST_HEAD(&work->list);
 	work->inode = inode;
-	work->delay_iput = delay_iput;
+	/* Lowest bit of inode pointer tracks the delayed status */
+	if (delay_iput)
+		*((unsigned long *)work->inode) |= 1UL;
 	WARN_ON_ONCE(!inode);
 	btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
 			btrfs_run_delalloc_work, NULL, NULL);