diff mbox series

[1/3] btrfs: zoned: move superblock logging zone location

Message ID 7d02b9117f15101e70d2cd37da05ca93c2fd624d.1614331998.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series Fixes for zoned mode | expand

Commit Message

Naohiro Aota Feb. 26, 2021, 9:34 a.m. UTC
This commit moves the location of superblock logging zones basing on the
static address instead of the static zone number.

The following zones are reserved as the circular buffer on zoned btrfs.
  - The primary superblock: zone at LBA 0 and the next zone
  - The first copy: zone at LBA 16G and the next zone
  - The second copy: zone at LBA 256G and the next zone

We disallow zone size larger than 8GB not to overlap the superblock log
zones.

Since the superblock zones overlap, we disallow zone size larger than 8GB.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/zoned.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

Comments

David Sterba Feb. 26, 2021, 7:11 p.m. UTC | #1
On Fri, Feb 26, 2021 at 06:34:36PM +0900, Naohiro Aota wrote:
> This commit moves the location of superblock logging zones basing on the
> static address instead of the static zone number.
> 
> The following zones are reserved as the circular buffer on zoned btrfs.
>   - The primary superblock: zone at LBA 0 and the next zone
>   - The first copy: zone at LBA 16G and the next zone
>   - The second copy: zone at LBA 256G and the next zone

This contains all the important information but somehow feels too short
given how many mails we've exchanged and all the reasoning why we do
that

> 
> We disallow zone size larger than 8GB not to overlap the superblock log
> zones.
> 
> Since the superblock zones overlap, we disallow zone size larger than 8GB.

or why we chose 8G to be the reasonable upper limit for the zone size.

> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>  fs/btrfs/zoned.c | 21 +++++++++++++++------
>  1 file changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index 9a5cf153da89..40cb99854844 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -112,10 +112,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
>  
>  /*
>   * The following zones are reserved as the circular buffer on ZONED btrfs.
> - *  - The primary superblock: zones 0 and 1
> - *  - The first copy: zones 16 and 17
> - *  - The second copy: zones 1024 or zone at 256GB which is minimum, and
> - *                     the following one
> + *  - The primary superblock: zone at LBA 0 and the next zone
> + *  - The first copy: zone at LBA 16G and the next zone
> + *  - The second copy: zone at LBA 256G and the next zone
>   */
>  static inline u32 sb_zone_number(int shift, int mirror)
>  {
> @@ -123,8 +122,8 @@ static inline u32 sb_zone_number(int shift, int mirror)
>  
>  	switch (mirror) {
>  	case 0: return 0;
> -	case 1: return 16;
> -	case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024);
> +	case 1: return 1 << (const_ilog2(SZ_16G) - shift);
> +	case 2: return 1 << (const_ilog2(SZ_1G) + 8 - shift);

This ilog(SZ_1G) + 8 is confusing, it should have been 256G for clarity,
as it's a constant it'll get expanded at compile time.

>  	}
>  
>  	return 0;
> @@ -300,6 +299,16 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
>  		zone_sectors = bdev_zone_sectors(bdev);
>  	}
>  
> +	/* We don't support zone size > 8G that SB log zones overlap. */
> +	if (zone_sectors > (SZ_8G >> SECTOR_SHIFT)) {
> +		btrfs_err_in_rcu(fs_info,
> +				 "zoned: %s: zone size %llu is too large",
> +				 rcu_str_deref(device->name),
> +				 (u64)zone_sectors << SECTOR_SHIFT);
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
>  	nr_sectors = bdev_nr_sectors(bdev);
>  	/* Check if it's power of 2 (see is_power_of_2) */
>  	ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);
> -- 
> 2.30.1
Naohiro Aota March 1, 2021, 4:55 a.m. UTC | #2
On Fri, Feb 26, 2021 at 08:11:30PM +0100, David Sterba wrote:
> On Fri, Feb 26, 2021 at 06:34:36PM +0900, Naohiro Aota wrote:
> > This commit moves the location of superblock logging zones basing on the
> > static address instead of the static zone number.
> > 
> > The following zones are reserved as the circular buffer on zoned btrfs.
> >   - The primary superblock: zone at LBA 0 and the next zone
> >   - The first copy: zone at LBA 16G and the next zone
> >   - The second copy: zone at LBA 256G and the next zone
> 
> This contains all the important information but somehow feels too short
> given how many mails we've exchanged and all the reasoning why we do
> that

Yep, sure. I'll expand the description and repost.

> > 
> > We disallow zone size larger than 8GB not to overlap the superblock log
> > zones.
> > 
> > Since the superblock zones overlap, we disallow zone size larger than 8GB.
> 
> or why we chose 8G to be the reasonable upper limit for the zone size.
> 
> > Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> > ---
> >  fs/btrfs/zoned.c | 21 +++++++++++++++------
> >  1 file changed, 15 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> > index 9a5cf153da89..40cb99854844 100644
> > --- a/fs/btrfs/zoned.c
> > +++ b/fs/btrfs/zoned.c
> > @@ -112,10 +112,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
> >  
> >  /*
> >   * The following zones are reserved as the circular buffer on ZONED btrfs.
> > - *  - The primary superblock: zones 0 and 1
> > - *  - The first copy: zones 16 and 17
> > - *  - The second copy: zones 1024 or zone at 256GB which is minimum, and
> > - *                     the following one
> > + *  - The primary superblock: zone at LBA 0 and the next zone
> > + *  - The first copy: zone at LBA 16G and the next zone
> > + *  - The second copy: zone at LBA 256G and the next zone
> >   */
> >  static inline u32 sb_zone_number(int shift, int mirror)
> >  {
> > @@ -123,8 +122,8 @@ static inline u32 sb_zone_number(int shift, int mirror)
> >  
> >  	switch (mirror) {
> >  	case 0: return 0;
> > -	case 1: return 16;
> > -	case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024);
> > +	case 1: return 1 << (const_ilog2(SZ_16G) - shift);
> > +	case 2: return 1 << (const_ilog2(SZ_1G) + 8 - shift);
> 
> This ilog(SZ_1G) + 8 is confusing, it should have been 256G for clarity,
> as it's a constant it'll get expanded at compile time.

I'd like to use SZ_256G here, but linux/sizes.h does not define
it. I'll define one for us and use it in the next version.

> >  	}
> >  
> >  	return 0;
> > @@ -300,6 +299,16 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
> >  		zone_sectors = bdev_zone_sectors(bdev);
> >  	}
> >  
> > +	/* We don't support zone size > 8G that SB log zones overlap. */
> > +	if (zone_sectors > (SZ_8G >> SECTOR_SHIFT)) {
> > +		btrfs_err_in_rcu(fs_info,
> > +				 "zoned: %s: zone size %llu is too large",
> > +				 rcu_str_deref(device->name),
> > +				 (u64)zone_sectors << SECTOR_SHIFT);
> > +		ret = -EINVAL;
> > +		goto out;
> > +	}
> > +
> >  	nr_sectors = bdev_nr_sectors(bdev);
> >  	/* Check if it's power of 2 (see is_power_of_2) */
> >  	ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);
> > -- 
> > 2.30.1
Damien Le Moal March 1, 2021, 5:17 a.m. UTC | #3
On 2021/03/01 14:02, Naohiro Aota wrote:
> On Fri, Feb 26, 2021 at 08:11:30PM +0100, David Sterba wrote:
>> On Fri, Feb 26, 2021 at 06:34:36PM +0900, Naohiro Aota wrote:
>>> This commit moves the location of superblock logging zones basing on the
>>> static address instead of the static zone number.
>>>
>>> The following zones are reserved as the circular buffer on zoned btrfs.
>>>   - The primary superblock: zone at LBA 0 and the next zone
>>>   - The first copy: zone at LBA 16G and the next zone
>>>   - The second copy: zone at LBA 256G and the next zone
>>
>> This contains all the important information but somehow feels too short
>> given how many mails we've exchanged and all the reasoning why we do
>> that
> 
> Yep, sure. I'll expand the description and repost.
> 
>>>
>>> We disallow zone size larger than 8GB not to overlap the superblock log
>>> zones.
>>>
>>> Since the superblock zones overlap, we disallow zone size larger than 8GB.
>>
>> or why we chose 8G to be the reasonable upper limit for the zone size.
>>
>>> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
>>> ---
>>>  fs/btrfs/zoned.c | 21 +++++++++++++++------
>>>  1 file changed, 15 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
>>> index 9a5cf153da89..40cb99854844 100644
>>> --- a/fs/btrfs/zoned.c
>>> +++ b/fs/btrfs/zoned.c
>>> @@ -112,10 +112,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
>>>  
>>>  /*
>>>   * The following zones are reserved as the circular buffer on ZONED btrfs.
>>> - *  - The primary superblock: zones 0 and 1
>>> - *  - The first copy: zones 16 and 17
>>> - *  - The second copy: zones 1024 or zone at 256GB which is minimum, and
>>> - *                     the following one
>>> + *  - The primary superblock: zone at LBA 0 and the next zone
>>> + *  - The first copy: zone at LBA 16G and the next zone
>>> + *  - The second copy: zone at LBA 256G and the next zone
>>>   */
>>>  static inline u32 sb_zone_number(int shift, int mirror)
>>>  {
>>> @@ -123,8 +122,8 @@ static inline u32 sb_zone_number(int shift, int mirror)
>>>  
>>>  	switch (mirror) {
>>>  	case 0: return 0;
>>> -	case 1: return 16;
>>> -	case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024);
>>> +	case 1: return 1 << (const_ilog2(SZ_16G) - shift);
>>> +	case 2: return 1 << (const_ilog2(SZ_1G) + 8 - shift);
>>
>> This ilog(SZ_1G) + 8 is confusing, it should have been 256G for clarity,
>> as it's a constant it'll get expanded at compile time.
> 
> I'd like to use SZ_256G here, but linux/sizes.h does not define
> it. I'll define one for us and use it in the next version.

Or just use const_ilog2(256 * SZ_1G)... That is fairly easy to understand :)

I would even go further and add:

#define BTRFS_SB_FIRST_COPY_OFST		(16ULL * SZ_1G)
#define BTRFS_SB_SECOND_COPY_OFST		(256ULL * SZ_1G)

To be clear about what the values represent.
Then you can have:

+	case 1: return 1 << (const_ilog2(BTRFS_SB_FIRST_COPY_OFST) - shift);
+	case 2: return 1 << (const_ilog2(BTRFS_SB_SECOND_COPY_OFST) - shift);

> 
>>>  	}
>>>  
>>>  	return 0;
>>> @@ -300,6 +299,16 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
>>>  		zone_sectors = bdev_zone_sectors(bdev);
>>>  	}
>>>  
>>> +	/* We don't support zone size > 8G that SB log zones overlap. */
>>> +	if (zone_sectors > (SZ_8G >> SECTOR_SHIFT)) {
>>> +		btrfs_err_in_rcu(fs_info,
>>> +				 "zoned: %s: zone size %llu is too large",
>>> +				 rcu_str_deref(device->name),
>>> +				 (u64)zone_sectors << SECTOR_SHIFT);
>>> +		ret = -EINVAL;
>>> +		goto out;
>>> +	}
>>> +
>>>  	nr_sectors = bdev_nr_sectors(bdev);
>>>  	/* Check if it's power of 2 (see is_power_of_2) */
>>>  	ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);
>>> -- 
>>> 2.30.1
>
David Sterba March 1, 2021, 1:25 p.m. UTC | #4
On Mon, Mar 01, 2021 at 05:17:52AM +0000, Damien Le Moal wrote:
> On 2021/03/01 14:02, Naohiro Aota wrote:
> > On Fri, Feb 26, 2021 at 08:11:30PM +0100, David Sterba wrote:
> >> On Fri, Feb 26, 2021 at 06:34:36PM +0900, Naohiro Aota wrote:
> >>> This commit moves the location of superblock logging zones basing on the
> >>> static address instead of the static zone number.
> >>>
> >>> The following zones are reserved as the circular buffer on zoned btrfs.
> >>>   - The primary superblock: zone at LBA 0 and the next zone
> >>>   - The first copy: zone at LBA 16G and the next zone
> >>>   - The second copy: zone at LBA 256G and the next zone
> >>
> >> This contains all the important information but somehow feels too short
> >> given how many mails we've exchanged and all the reasoning why we do
> >> that
> > 
> > Yep, sure. I'll expand the description and repost.
> > 
> >>>
> >>> We disallow zone size larger than 8GB not to overlap the superblock log
> >>> zones.
> >>>
> >>> Since the superblock zones overlap, we disallow zone size larger than 8GB.
> >>
> >> or why we chose 8G to be the reasonable upper limit for the zone size.
> >>
> >>> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> >>> ---
> >>>  fs/btrfs/zoned.c | 21 +++++++++++++++------
> >>>  1 file changed, 15 insertions(+), 6 deletions(-)
> >>>
> >>> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> >>> index 9a5cf153da89..40cb99854844 100644
> >>> --- a/fs/btrfs/zoned.c
> >>> +++ b/fs/btrfs/zoned.c
> >>> @@ -112,10 +112,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
> >>>  
> >>>  /*
> >>>   * The following zones are reserved as the circular buffer on ZONED btrfs.
> >>> - *  - The primary superblock: zones 0 and 1
> >>> - *  - The first copy: zones 16 and 17
> >>> - *  - The second copy: zones 1024 or zone at 256GB which is minimum, and
> >>> - *                     the following one
> >>> + *  - The primary superblock: zone at LBA 0 and the next zone
> >>> + *  - The first copy: zone at LBA 16G and the next zone
> >>> + *  - The second copy: zone at LBA 256G and the next zone
> >>>   */
> >>>  static inline u32 sb_zone_number(int shift, int mirror)
> >>>  {
> >>> @@ -123,8 +122,8 @@ static inline u32 sb_zone_number(int shift, int mirror)
> >>>  
> >>>  	switch (mirror) {
> >>>  	case 0: return 0;
> >>> -	case 1: return 16;
> >>> -	case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024);
> >>> +	case 1: return 1 << (const_ilog2(SZ_16G) - shift);
> >>> +	case 2: return 1 << (const_ilog2(SZ_1G) + 8 - shift);
> >>
> >> This ilog(SZ_1G) + 8 is confusing, it should have been 256G for clarity,
> >> as it's a constant it'll get expanded at compile time.
> > 
> > I'd like to use SZ_256G here, but linux/sizes.h does not define
> > it. I'll define one for us and use it in the next version.
> 
> Or just use const_ilog2(256 * SZ_1G)... That is fairly easy to understand :)
> 
> I would even go further and add:
> 
> #define BTRFS_SB_FIRST_COPY_OFST		(16ULL * SZ_1G)
> #define BTRFS_SB_SECOND_COPY_OFST		(256ULL * SZ_1G)
> 
> To be clear about what the values represent.
> Then you can have:
> 
> +	case 1: return 1 << (const_ilog2(BTRFS_SB_FIRST_COPY_OFST) - shift);
> +	case 2: return 1 << (const_ilog2(BTRFS_SB_SECOND_COPY_OFST) - shift);

That's even better, so the magic constants are defined in a more visible
place. Maybe the shift can be also defined separately and then just used
here, like we have PAGE_SIZE/PAGE_SHIFT and such.
diff mbox series

Patch

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 9a5cf153da89..40cb99854844 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -112,10 +112,9 @@  static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
 
 /*
  * The following zones are reserved as the circular buffer on ZONED btrfs.
- *  - The primary superblock: zones 0 and 1
- *  - The first copy: zones 16 and 17
- *  - The second copy: zones 1024 or zone at 256GB which is minimum, and
- *                     the following one
+ *  - The primary superblock: zone at LBA 0 and the next zone
+ *  - The first copy: zone at LBA 16G and the next zone
+ *  - The second copy: zone at LBA 256G and the next zone
  */
 static inline u32 sb_zone_number(int shift, int mirror)
 {
@@ -123,8 +122,8 @@  static inline u32 sb_zone_number(int shift, int mirror)
 
 	switch (mirror) {
 	case 0: return 0;
-	case 1: return 16;
-	case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024);
+	case 1: return 1 << (const_ilog2(SZ_16G) - shift);
+	case 2: return 1 << (const_ilog2(SZ_1G) + 8 - shift);
 	}
 
 	return 0;
@@ -300,6 +299,16 @@  int btrfs_get_dev_zone_info(struct btrfs_device *device)
 		zone_sectors = bdev_zone_sectors(bdev);
 	}
 
+	/* We don't support zone size > 8G that SB log zones overlap. */
+	if (zone_sectors > (SZ_8G >> SECTOR_SHIFT)) {
+		btrfs_err_in_rcu(fs_info,
+				 "zoned: %s: zone size %llu is too large",
+				 rcu_str_deref(device->name),
+				 (u64)zone_sectors << SECTOR_SHIFT);
+		ret = -EINVAL;
+		goto out;
+	}
+
 	nr_sectors = bdev_nr_sectors(bdev);
 	/* Check if it's power of 2 (see is_power_of_2) */
 	ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);