diff mbox series

[v10,04/41] btrfs: get zone information of zoned block devices

Message ID cf46f0aef5a214cae8bacb2be231efed5febef5f.1605007036.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned block device support | expand

Commit Message

Naohiro Aota Nov. 10, 2020, 11:26 a.m. UTC
If a zoned block device is found, get its zone information (number of zones
and zone size) using the new helper function btrfs_get_dev_zone_info().  To
avoid costly run-time zone report commands to test the device zones type
during block allocation, attach the seq_zones bitmap to the device
structure to indicate if a zone is sequential or accept random writes. Also
it attaches the empty_zones bitmap to indicate if a zone is empty or not.

This patch also introduces the helper function btrfs_dev_is_sequential() to
test if the zone storing a block is a sequential write required zone and
btrfs_dev_is_empty_zone() to test if the zone is a empty zone.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/btrfs/Makefile      |   1 +
 fs/btrfs/dev-replace.c |   5 ++
 fs/btrfs/super.c       |   5 ++
 fs/btrfs/volumes.c     |  19 ++++-
 fs/btrfs/volumes.h     |   4 +
 fs/btrfs/zoned.c       | 182 +++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/zoned.h       |  91 +++++++++++++++++++++
 7 files changed, 305 insertions(+), 2 deletions(-)
 create mode 100644 fs/btrfs/zoned.c
 create mode 100644 fs/btrfs/zoned.h

Comments

Anand Jain Nov. 12, 2020, 6:57 a.m. UTC | #1
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 8840a4fa81eb..ed55014fd1bd 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -2462,6 +2462,11 @@ static void __init btrfs_print_mod_info(void)
>   #endif
>   #ifdef CONFIG_BTRFS_FS_REF_VERIFY
>   			", ref-verify=on"
> +#endif
> +#ifdef CONFIG_BLK_DEV_ZONED
> +			", zoned=yes"
> +#else
> +			", zoned=no"
>   #endif

IMO, we don't need this, as most of the generic kernel will be compiled
with the CONFIG_BLK_DEV_ZONED defined.
For review purpose we may want to know if the mounted device
is a zoned device. So log of zone device and its type may be useful
when we have verified the zoned devices in the open_ctree().

> @@ -374,6 +375,7 @@ void btrfs_free_device(struct btrfs_device *device)
>   	rcu_string_free(device->name);
>   	extent_io_tree_release(&device->alloc_state);
>   	bio_put(device->flush_bio);

> +	btrfs_destroy_dev_zone_info(device);

Free of btrfs_device::zone_info is already happening in the path..

  btrfs_close_one_device()
    btrfs_destroy_dev_zone_info()

  We don't need this..

  btrfs_free_device()
   btrfs_destroy_dev_zone_info()


> @@ -2543,6 +2551,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>   	}
>   	rcu_assign_pointer(device->name, name);
>   
> +	device->fs_info = fs_info;
> +	device->bdev = bdev;
> +
> +	/* Get zone type information of zoned block devices */
> +	ret = btrfs_get_dev_zone_info(device);
> +	if (ret)
> +		goto error_free_device;
> +
>   	trans = btrfs_start_transaction(root, 0);
>   	if (IS_ERR(trans)) {
>   		ret = PTR_ERR(trans);

It should be something like goto error_free_zone from here.


> @@ -2707,6 +2721,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>   		sb->s_flags |= SB_RDONLY;
>   	if (trans)
>   		btrfs_end_transaction(trans);


error_free_zone:
> +	btrfs_destroy_dev_zone_info(device);
>   error_free_device:
>   	btrfs_free_device(device);
>   error:

  As mentioned we don't need btrfs_destroy_dev_zone_info()
  again in  btrfs_free_device(). Otherwise we end up calling
  btrfs_destroy_dev_zone_info twice here.


Thanks, Anand
Johannes Thumshirn Nov. 12, 2020, 7:35 a.m. UTC | #2
On 12/11/2020 08:00, Anand Jain wrote:
>> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
>> index 8840a4fa81eb..ed55014fd1bd 100644
>> --- a/fs/btrfs/super.c
>> +++ b/fs/btrfs/super.c
>> @@ -2462,6 +2462,11 @@ static void __init btrfs_print_mod_info(void)
>>   #endif
>>   #ifdef CONFIG_BTRFS_FS_REF_VERIFY
>>   			", ref-verify=on"
>> +#endif
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +			", zoned=yes"
>> +#else
>> +			", zoned=no"
>>   #endif
> IMO, we don't need this, as most of the generic kernel will be compiled
> with the CONFIG_BLK_DEV_ZONED defined.
> For review purpose we may want to know if the mounted device
> is a zoned device. So log of zone device and its type may be useful
> when we have verified the zoned devices in the open_ctree().
> 

David explicitly asked for this in [1] so we included it.

[1] https://lore.kernel.org/linux-btrfs/20201013155301.GE6756@twin.jikos.cz
Damien Le Moal Nov. 12, 2020, 7:44 a.m. UTC | #3
On 2020/11/12 16:35, Johannes Thumshirn wrote:
> On 12/11/2020 08:00, Anand Jain wrote:
>>> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
>>> index 8840a4fa81eb..ed55014fd1bd 100644
>>> --- a/fs/btrfs/super.c
>>> +++ b/fs/btrfs/super.c
>>> @@ -2462,6 +2462,11 @@ static void __init btrfs_print_mod_info(void)
>>>   #endif
>>>   #ifdef CONFIG_BTRFS_FS_REF_VERIFY
>>>   			", ref-verify=on"
>>> +#endif
>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>> +			", zoned=yes"
>>> +#else
>>> +			", zoned=no"
>>>   #endif
>> IMO, we don't need this, as most of the generic kernel will be compiled
>> with the CONFIG_BLK_DEV_ZONED defined.
>> For review purpose we may want to know if the mounted device
>> is a zoned device. So log of zone device and its type may be useful
>> when we have verified the zoned devices in the open_ctree().
>>
> 
> David explicitly asked for this in [1] so we included it.
> 
> [1] https://lore.kernel.org/linux-btrfs/20201013155301.GE6756@twin.jikos.cz
> 

And as of now, not all generic kernels are compiled with CONFIG_BLK_DEV_ZONED.
E.g. RHEL and CentOS. That may change in the future, but it should not be
assumed that CONFIG_BLK_DEV_ZONED is always enabled.
Johannes Thumshirn Nov. 12, 2020, 9:39 a.m. UTC | #4
On 12/11/2020 08:00, Anand Jain wrote:
> 
> 
>> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
>> index 8840a4fa81eb..ed55014fd1bd 100644
>> --- a/fs/btrfs/super.c
>> +++ b/fs/btrfs/super.c
>> @@ -2462,6 +2462,11 @@ static void __init btrfs_print_mod_info(void)
>>   #endif
>>   #ifdef CONFIG_BTRFS_FS_REF_VERIFY
>>   			", ref-verify=on"
>> +#endif
>> +#ifdef CONFIG_BLK_DEV_ZONED
>> +			", zoned=yes"
>> +#else
>> +			", zoned=no"
>>   #endif
> 
> IMO, we don't need this, as most of the generic kernel will be compiled
> with the CONFIG_BLK_DEV_ZONED defined.
> For review purpose we may want to know if the mounted device
> is a zoned device. So log of zone device and its type may be useful
> when we have verified the zoned devices in the open_ctree().
> 
>> @@ -374,6 +375,7 @@ void btrfs_free_device(struct btrfs_device *device)
>>   	rcu_string_free(device->name);
>>   	extent_io_tree_release(&device->alloc_state);
>>   	bio_put(device->flush_bio);
> 
>> +	btrfs_destroy_dev_zone_info(device);
> 
> Free of btrfs_device::zone_info is already happening in the path..
> 
>   btrfs_close_one_device()
>     btrfs_destroy_dev_zone_info()
> 
>   We don't need this..
> 
>   btrfs_free_device()
>    btrfs_destroy_dev_zone_info()
> 
> 
>> @@ -2543,6 +2551,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>>   	}
>>   	rcu_assign_pointer(device->name, name);
>>   
>> +	device->fs_info = fs_info;
>> +	device->bdev = bdev;
>> +
>> +	/* Get zone type information of zoned block devices */
>> +	ret = btrfs_get_dev_zone_info(device);
>> +	if (ret)
>> +		goto error_free_device;
>> +
>>   	trans = btrfs_start_transaction(root, 0);
>>   	if (IS_ERR(trans)) {
>>   		ret = PTR_ERR(trans);
> 
> It should be something like goto error_free_zone from here.
> 
> 
>> @@ -2707,6 +2721,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>>   		sb->s_flags |= SB_RDONLY;
>>   	if (trans)
>>   		btrfs_end_transaction(trans);
> 
> 
> error_free_zone:
>> +	btrfs_destroy_dev_zone_info(device);
>>   error_free_device:
>>   	btrfs_free_device(device);
>>   error:
> 
>   As mentioned we don't need btrfs_destroy_dev_zone_info()
>   again in  btrfs_free_device(). Otherwise we end up calling
>   btrfs_destroy_dev_zone_info twice here.

Which doesn't do any harm as:
void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
{
        struct btrfs_zoned_device_info *zone_info = device->zone_info;

        if (!zone_info)
                return;

	/* ... */
        device->zone_info = NULL;
}

Not sure what would be the preferred style here
Anand Jain Nov. 12, 2020, 9:44 a.m. UTC | #5
On 12/11/20 3:44 pm, Damien Le Moal wrote:
> On 2020/11/12 16:35, Johannes Thumshirn wrote:
>> On 12/11/2020 08:00, Anand Jain wrote:
>>>> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
>>>> index 8840a4fa81eb..ed55014fd1bd 100644
>>>> --- a/fs/btrfs/super.c
>>>> +++ b/fs/btrfs/super.c
>>>> @@ -2462,6 +2462,11 @@ static void __init btrfs_print_mod_info(void)
>>>>    #endif
>>>>    #ifdef CONFIG_BTRFS_FS_REF_VERIFY
>>>>    			", ref-verify=on"
>>>> +#endif
>>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>>> +			", zoned=yes"
>>>> +#else
>>>> +			", zoned=no"
>>>>    #endif
>>> IMO, we don't need this, as most of the generic kernel will be compiled
>>> with the CONFIG_BLK_DEV_ZONED defined.
>>> For review purpose we may want to know if the mounted device
>>> is a zoned device. So log of zone device and its type may be useful
>>> when we have verified the zoned devices in the open_ctree().
>>>
>>
>> David explicitly asked for this in [1] so we included it.
>>
>> [1] https://lore.kernel.org/linux-btrfs/20201013155301.GE6756@twin.jikos.cz
>>
> 
> And as of now, not all generic kernels are compiled with CONFIG_BLK_DEV_ZONED.
> E.g. RHEL and CentOS. That may change in the future, but it should not be
> assumed that CONFIG_BLK_DEV_ZONED is always enabled.
> 

Ok. My comment was from the long term perspective. I am fine if you want 
to keep it.
Naohiro Aota Nov. 12, 2020, 12:57 p.m. UTC | #6
On Thu, Nov 12, 2020 at 02:57:42PM +0800, Anand Jain wrote:
>
>
>>diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
>>index 8840a4fa81eb..ed55014fd1bd 100644
>>--- a/fs/btrfs/super.c
>>+++ b/fs/btrfs/super.c
>>@@ -2462,6 +2462,11 @@ static void __init btrfs_print_mod_info(void)
>>  #endif
>>  #ifdef CONFIG_BTRFS_FS_REF_VERIFY
>>  			", ref-verify=on"
>>+#endif
>>+#ifdef CONFIG_BLK_DEV_ZONED
>>+			", zoned=yes"
>>+#else
>>+			", zoned=no"
>>  #endif
>
>IMO, we don't need this, as most of the generic kernel will be compiled
>with the CONFIG_BLK_DEV_ZONED defined.
>For review purpose we may want to know if the mounted device
>is a zoned device. So log of zone device and its type may be useful
>when we have verified the zoned devices in the open_ctree().
>
>>@@ -374,6 +375,7 @@ void btrfs_free_device(struct btrfs_device *device)
>>  	rcu_string_free(device->name);
>>  	extent_io_tree_release(&device->alloc_state);
>>  	bio_put(device->flush_bio);
>
>>+	btrfs_destroy_dev_zone_info(device);
>
>Free of btrfs_device::zone_info is already happening in the path..
>
> btrfs_close_one_device()
>   btrfs_destroy_dev_zone_info()
>
> We don't need this..
>
> btrfs_free_device()
>  btrfs_destroy_dev_zone_info()

Ah, yes, I once had it only in btrfs_free_device() and noticed that it does
not free the device zone info on umount. So, I added one in
btrfs_close_one_device() and forgot to remove the other one. I'll drop it
from btrfs_free_device().

>
>
>>@@ -2543,6 +2551,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>>  	}
>>  	rcu_assign_pointer(device->name, name);
>>+	device->fs_info = fs_info;
>>+	device->bdev = bdev;
>>+
>>+	/* Get zone type information of zoned block devices */
>>+	ret = btrfs_get_dev_zone_info(device);
>>+	if (ret)
>>+		goto error_free_device;
>>+
>>  	trans = btrfs_start_transaction(root, 0);
>>  	if (IS_ERR(trans)) {
>>  		ret = PTR_ERR(trans);
>
>It should be something like goto error_free_zone from here.
>
>
>>@@ -2707,6 +2721,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
>>  		sb->s_flags |= SB_RDONLY;
>>  	if (trans)
>>  		btrfs_end_transaction(trans);
>
>
>error_free_zone:

And, I'll do something like this.

>>+	btrfs_destroy_dev_zone_info(device);
>>  error_free_device:
>>  	btrfs_free_device(device);
>>  error:
>
> As mentioned we don't need btrfs_destroy_dev_zone_info()
> again in  btrfs_free_device(). Otherwise we end up calling
> btrfs_destroy_dev_zone_info twice here.
>
>
>Thanks, Anand
David Sterba Nov. 13, 2020, 9:34 p.m. UTC | #7
On Thu, Nov 12, 2020 at 05:44:11PM +0800, Anand Jain wrote:
> On 12/11/20 3:44 pm, Damien Le Moal wrote:
> > On 2020/11/12 16:35, Johannes Thumshirn wrote:
> >> On 12/11/2020 08:00, Anand Jain wrote:
> >>>> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> >>>> index 8840a4fa81eb..ed55014fd1bd 100644
> >>>> --- a/fs/btrfs/super.c
> >>>> +++ b/fs/btrfs/super.c
> >>>> @@ -2462,6 +2462,11 @@ static void __init btrfs_print_mod_info(void)
> >>>>    #endif
> >>>>    #ifdef CONFIG_BTRFS_FS_REF_VERIFY
> >>>>    			", ref-verify=on"
> >>>> +#endif
> >>>> +#ifdef CONFIG_BLK_DEV_ZONED
> >>>> +			", zoned=yes"
> >>>> +#else
> >>>> +			", zoned=no"
> >>>>    #endif
> >>> IMO, we don't need this, as most of the generic kernel will be compiled
> >>> with the CONFIG_BLK_DEV_ZONED defined.
> >>> For review purpose we may want to know if the mounted device
> >>> is a zoned device. So log of zone device and its type may be useful
> >>> when we have verified the zoned devices in the open_ctree().
> >>>
> >>
> >> David explicitly asked for this in [1] so we included it.
> >>
> >> [1] https://lore.kernel.org/linux-btrfs/20201013155301.GE6756@twin.jikos.cz
> >>
> > 
> > And as of now, not all generic kernels are compiled with CONFIG_BLK_DEV_ZONED.
> > E.g. RHEL and CentOS. That may change in the future, but it should not be
> > assumed that CONFIG_BLK_DEV_ZONED is always enabled.
> 
> Ok. My comment was from the long term perspective. I am fine if you want 
> to keep it.

The idea is to let the module announce which conditionally built
features are there according to fs/btrfs/Makefile and Kconfig. Besides
ACLs that should be always on and self-tests that run right after module
load, all other are there and we should keep the list up to date.
Anand Jain Nov. 18, 2020, 11:17 a.m. UTC | #8
Also, %device->fs_info is not protected. It is better to avoid using
fs_info when we are still at open_fs_devices(). Yeah, the unknown part
can be better. We need to fix it as a whole. For now, you can use
something like...

-------------------------
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 1223d5b0e411..e857bb304d28 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -130,19 +130,11 @@ int btrfs_get_dev_zone_info(struct btrfs_device 
*device)
          * (device <unknown>) ..."
          */

-       rcu_read_lock();
-       if (device->fs_info)
-               btrfs_info(device->fs_info,
-                       "host-%s zoned block device %s, %u zones of %llu 
bytes",
-                       bdev_zoned_model(bdev) == BLK_ZONED_HM ? 
"managed" : "aware",
-                       rcu_str_deref(device->name), zone_info->nr_zones,
-                       zone_info->zone_size);
-       else
-               pr_info("BTRFS info: host-%s zoned block device %s, %u 
zones of %llu bytes",
-                       bdev_zoned_model(bdev) == BLK_ZONED_HM ? 
"managed" : "aware",
-                       rcu_str_deref(device->name), zone_info->nr_zones,
-                       zone_info->zone_size);
-       rcu_read_unlock();
+       btrfs_info_in_rcu(NULL,
+               "host-%s zoned block device %s, %u zones of %llu bytes",
+               bdev_zoned_model(bdev) == BLK_ZONED_HM ? "managed" : 
"aware",
+               rcu_str_deref(device->name), zone_info->nr_zones,
+               zone_info->zone_size);

         return 0;
  ---------------------------

Thanks, Anand


On 12/11/20 8:57 pm, Naohiro Aota wrote:
> On Thu, Nov 12, 2020 at 02:57:42PM +0800, Anand Jain wrote:
>>
>>
>>> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
>>> index 8840a4fa81eb..ed55014fd1bd 100644
>>> --- a/fs/btrfs/super.c
>>> +++ b/fs/btrfs/super.c
>>> @@ -2462,6 +2462,11 @@ static void __init btrfs_print_mod_info(void)
>>>  #endif
>>>  #ifdef CONFIG_BTRFS_FS_REF_VERIFY
>>>              ", ref-verify=on"
>>> +#endif
>>> +#ifdef CONFIG_BLK_DEV_ZONED
>>> +            ", zoned=yes"
>>> +#else
>>> +            ", zoned=no"
>>>  #endif
>>
>> IMO, we don't need this, as most of the generic kernel will be compiled
>> with the CONFIG_BLK_DEV_ZONED defined.
>> For review purpose we may want to know if the mounted device
>> is a zoned device. So log of zone device and its type may be useful
>> when we have verified the zoned devices in the open_ctree().
>>
>>> @@ -374,6 +375,7 @@ void btrfs_free_device(struct btrfs_device *device)
>>>      rcu_string_free(device->name);
>>>      extent_io_tree_release(&device->alloc_state);
>>>      bio_put(device->flush_bio);
>>
>>> +    btrfs_destroy_dev_zone_info(device);
>>
>> Free of btrfs_device::zone_info is already happening in the path..
>>
>> btrfs_close_one_device()
>>   btrfs_destroy_dev_zone_info()
>>
>> We don't need this..
>>
>> btrfs_free_device()
>>  btrfs_destroy_dev_zone_info()
> 
> Ah, yes, I once had it only in btrfs_free_device() and noticed that it does
> not free the device zone info on umount. So, I added one in
> btrfs_close_one_device() and forgot to remove the other one. I'll drop it
> from btrfs_free_device().
> 
>>
>>
>>> @@ -2543,6 +2551,14 @@ int btrfs_init_new_device(struct btrfs_fs_info 
>>> *fs_info, const char *device_path
>>>      }
>>>      rcu_assign_pointer(device->name, name);
>>> +    device->fs_info = fs_info;
>>> +    device->bdev = bdev;
>>> +
>>> +    /* Get zone type information of zoned block devices */
>>> +    ret = btrfs_get_dev_zone_info(device);
>>> +    if (ret)
>>> +        goto error_free_device;
>>> +
>>>      trans = btrfs_start_transaction(root, 0);
>>>      if (IS_ERR(trans)) {
>>>          ret = PTR_ERR(trans);
>>
>> It should be something like goto error_free_zone from here.
>>
>>
>>> @@ -2707,6 +2721,7 @@ int btrfs_init_new_device(struct btrfs_fs_info 
>>> *fs_info, const char *device_path
>>>          sb->s_flags |= SB_RDONLY;
>>>      if (trans)
>>>          btrfs_end_transaction(trans);
>>
>>
>> error_free_zone:
> 
> And, I'll do something like this.
> 
>>> +    btrfs_destroy_dev_zone_info(device);
>>>  error_free_device:
>>>      btrfs_free_device(device);
>>>  error:
>>
>> As mentioned we don't need btrfs_destroy_dev_zone_info()
>> again in  btrfs_free_device(). Otherwise we end up calling
>> btrfs_destroy_dev_zone_info twice here.
>>
>>
>> Thanks, Anand
David Sterba Nov. 25, 2020, 9:47 p.m. UTC | #9
On Tue, Nov 10, 2020 at 08:26:07PM +0900, Naohiro Aota wrote:
> +int btrfs_get_dev_zone_info(struct btrfs_device *device)
> +{
> +	struct btrfs_zoned_device_info *zone_info = NULL;
> +	struct block_device *bdev = device->bdev;
> +	sector_t nr_sectors = bdev->bd_part->nr_sects;
> +	sector_t sector = 0;

I'd rather replace the sector_t types with u64. The type is unsigned
long and does not have the same width on 32/64 bit. The typecasts must
be used and if not, bugs happen (and happened).

> +	struct blk_zone *zones = NULL;
> +	unsigned int i, nreported = 0, nr_zones;
> +	unsigned int zone_sectors;
> +	int ret;
> +
> +	if (!bdev_is_zoned(bdev))
> +		return 0;
> +
> +	if (device->zone_info)
> +		return 0;
> +
> +	zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL);
> +	if (!zone_info)
> +		return -ENOMEM;
> +
> +	zone_sectors = bdev_zone_sectors(bdev);
> +	ASSERT(is_power_of_2(zone_sectors));

As is_power_of_2 works only on longs, this needs to be opencoded as
there's no unsigned long long version.
David Sterba Nov. 25, 2020, 10:07 p.m. UTC | #10
On Wed, Nov 25, 2020 at 10:47:53PM +0100, David Sterba wrote:
> On Tue, Nov 10, 2020 at 08:26:07PM +0900, Naohiro Aota wrote:
> > +int btrfs_get_dev_zone_info(struct btrfs_device *device)
> > +{
> > +	struct btrfs_zoned_device_info *zone_info = NULL;
> > +	struct block_device *bdev = device->bdev;
> > +	sector_t nr_sectors = bdev->bd_part->nr_sects;
> > +	sector_t sector = 0;
> 
> I'd rather replace the sector_t types with u64. The type is unsigned
> long and does not have the same width on 32/64 bit. The typecasts must
> be used and if not, bugs happen (and happened).

Like in the same function a few lines below

   95         /* Get zones type */
   96         while (sector < nr_sectors) {
   97                 nr_zones = BTRFS_REPORT_NR_ZONES;
   98                 ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
   99                                           &nr_zones);

sector without a type cast to u64
David Sterba Nov. 25, 2020, 10:16 p.m. UTC | #11
On Tue, Nov 10, 2020 at 08:26:07PM +0900, Naohiro Aota wrote:
> +	/* Get zones type */
> +	while (sector < nr_sectors) {
> +		nr_zones = BTRFS_REPORT_NR_ZONES;
> +		ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
> +					  &nr_zones);
> +		if (ret)
> +			goto out;
> +
> +		for (i = 0; i < nr_zones; i++) {
> +			if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
> +				set_bit(nreported, zone_info->seq_zones);
> +			if (zones[i].cond == BLK_ZONE_COND_EMPTY)
> +				set_bit(nreported, zone_info->empty_zones);

set_bit is atomic and it's not needed as nothing else could be touching
the bitmap, so I'll switch it to plain __set_bit.
Damien Le Moal Nov. 25, 2020, 11:50 p.m. UTC | #12
Hi David,

On Wed, 2020-11-25 at 22:47 +0100, David Sterba wrote:
> On Tue, Nov 10, 2020 at 08:26:07PM +0900, Naohiro Aota wrote:
> > +int btrfs_get_dev_zone_info(struct btrfs_device *device)
> > +{
> > +	struct btrfs_zoned_device_info *zone_info = NULL;
> > +	struct block_device *bdev = device->bdev;
> > +	sector_t nr_sectors = bdev->bd_part->nr_sects;
> > +	sector_t sector = 0;
> 
> I'd rather replace the sector_t types with u64. The type is unsigned
> long and does not have the same width on 32/64 bit. The typecasts must
> be used and if not, bugs happen (and happened).

Since kernel 5.2, sector_t is unconditionally defined as u64 in linux/type.h:

typedef u64 sector_t;

CONFIG_LBDAF does not exist anymore.

I am not against using u64 at all, but using sector_t makes it clear what the
unit is for the values at hand.

> 
> > +	struct blk_zone *zones = NULL;
> > +	unsigned int i, nreported = 0, nr_zones;
> > +	unsigned int zone_sectors;
> > +	int ret;
> > +
> > +	if (!bdev_is_zoned(bdev))
> > +		return 0;
> > +
> > +	if (device->zone_info)
> > +		return 0;
> > +
> > +	zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL);
> > +	if (!zone_info)
> > +		return -ENOMEM;
> > +
> > +	zone_sectors = bdev_zone_sectors(bdev);
> > +	ASSERT(is_power_of_2(zone_sectors));
> 
> As is_power_of_2 works only on longs, this needs to be opencoded as
> there's no unsigned long long version.
David Sterba Nov. 26, 2020, 2:11 p.m. UTC | #13
On Wed, Nov 25, 2020 at 11:50:39PM +0000, Damien Le Moal wrote:
> Hi David,
> 
> On Wed, 2020-11-25 at 22:47 +0100, David Sterba wrote:
> > On Tue, Nov 10, 2020 at 08:26:07PM +0900, Naohiro Aota wrote:
> > > +int btrfs_get_dev_zone_info(struct btrfs_device *device)
> > > +{
> > > +	struct btrfs_zoned_device_info *zone_info = NULL;
> > > +	struct block_device *bdev = device->bdev;
> > > +	sector_t nr_sectors = bdev->bd_part->nr_sects;
> > > +	sector_t sector = 0;
> > 
> > I'd rather replace the sector_t types with u64. The type is unsigned
> > long and does not have the same width on 32/64 bit. The typecasts must
> > be used and if not, bugs happen (and happened).
> 
> Since kernel 5.2, sector_t is unconditionally defined as u64 in linux/type.h:
> 
> typedef u64 sector_t;
> 
> CONFIG_LBDAF does not exist anymore.

That's great, I was not aware of that.

> I am not against using u64 at all, but using sector_t makes it clear what the
> unit is for the values at hand.

Yeah agreed, I'll switch it back.
Anand Jain Nov. 30, 2020, 11:16 a.m. UTC | #14
Below two comments are fixed in the misc-next.

Reviewed-by: Anand Jain <anand.jain@oracle.com>

Thanks.


On 18/11/20 7:17 pm, Anand Jain wrote:
> 
> 
> Also, %device->fs_info is not protected. It is better to avoid using
> fs_info when we are still at open_fs_devices(). Yeah, the unknown part
> can be better. We need to fix it as a whole. For now, you can use
> something like...
> 
> -------------------------
> diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
> index 1223d5b0e411..e857bb304d28 100644
> --- a/fs/btrfs/zoned.c
> +++ b/fs/btrfs/zoned.c
> @@ -130,19 +130,11 @@ int btrfs_get_dev_zone_info(struct btrfs_device 
> *device)
>           * (device <unknown>) ..."
>           */
> 
> -       rcu_read_lock();
> -       if (device->fs_info)
> -               btrfs_info(device->fs_info,
> -                       "host-%s zoned block device %s, %u zones of %llu 
> bytes",
> -                       bdev_zoned_model(bdev) == BLK_ZONED_HM ? 
> "managed" : "aware",
> -                       rcu_str_deref(device->name), zone_info->nr_zones,
> -                       zone_info->zone_size);
> -       else
> -               pr_info("BTRFS info: host-%s zoned block device %s, %u 
> zones of %llu bytes",
> -                       bdev_zoned_model(bdev) == BLK_ZONED_HM ? 
> "managed" : "aware",
> -                       rcu_str_deref(device->name), zone_info->nr_zones,
> -                       zone_info->zone_size);
> -       rcu_read_unlock();
> +       btrfs_info_in_rcu(NULL,
> +               "host-%s zoned block device %s, %u zones of %llu bytes",
> +               bdev_zoned_model(bdev) == BLK_ZONED_HM ? "managed" : 
> "aware",
> +               rcu_str_deref(device->name), zone_info->nr_zones,
> +               zone_info->zone_size);
> 
>          return 0;
>   ---------------------------
> 
> Thanks, Anand
> 
> 



>>>> @@ -374,6 +375,7 @@ void btrfs_free_device(struct btrfs_device *device)
>>>>      rcu_string_free(device->name);
>>>>      extent_io_tree_release(&device->alloc_state);
>>>>      bio_put(device->flush_bio);
>>>
>>>> +    btrfs_destroy_dev_zone_info(device);
>>>
>>> Free of btrfs_device::zone_info is already happening in the path..
>>>
>>> btrfs_close_one_device()
>>>   btrfs_destroy_dev_zone_info()
>>>
>>> We don't need this..
>>>
>>> btrfs_free_device()
>>>  btrfs_destroy_dev_zone_info()
>>
>> Ah, yes, I once had it only in btrfs_free_device() and noticed that it 
>> does
>> not free the device zone info on umount. So, I added one in
>> btrfs_close_one_device() and forgot to remove the other one. I'll drop it
>> from btrfs_free_device().
diff mbox series

Patch

diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index e738f6206ea5..0497fdc37f90 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -16,6 +16,7 @@  btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
 btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
+btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o
 
 btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
 	tests/extent-buffer-tests.o tests/btrfs-tests.o \
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 20ce1970015f..6f6d77224c2b 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -21,6 +21,7 @@ 
 #include "rcu-string.h"
 #include "dev-replace.h"
 #include "sysfs.h"
+#include "zoned.h"
 
 /*
  * Device replace overview
@@ -291,6 +292,10 @@  static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
 	device->fs_devices = fs_info->fs_devices;
 
+	ret = btrfs_get_dev_zone_info(device);
+	if (ret)
+		goto error;
+
 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
 	list_add(&device->dev_list, &fs_info->fs_devices->devices);
 	fs_info->fs_devices->num_devices++;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8840a4fa81eb..ed55014fd1bd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2462,6 +2462,11 @@  static void __init btrfs_print_mod_info(void)
 #endif
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
 			", ref-verify=on"
+#endif
+#ifdef CONFIG_BLK_DEV_ZONED
+			", zoned=yes"
+#else
+			", zoned=no"
 #endif
 			;
 	pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 58b9c419a2b6..e787bf89f761 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -31,6 +31,7 @@ 
 #include "space-info.h"
 #include "block-group.h"
 #include "discard.h"
+#include "zoned.h"
 
 const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
 	[BTRFS_RAID_RAID10] = {
@@ -374,6 +375,7 @@  void btrfs_free_device(struct btrfs_device *device)
 	rcu_string_free(device->name);
 	extent_io_tree_release(&device->alloc_state);
 	bio_put(device->flush_bio);
+	btrfs_destroy_dev_zone_info(device);
 	kfree(device);
 }
 
@@ -667,6 +669,11 @@  static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 	clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
 	device->mode = flags;
 
+	/* Get zone type information of zoned block devices */
+	ret = btrfs_get_dev_zone_info(device);
+	if (ret != 0)
+		goto error_free_page;
+
 	fs_devices->open_devices++;
 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
 	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
@@ -1143,6 +1150,7 @@  static void btrfs_close_one_device(struct btrfs_device *device)
 		device->bdev = NULL;
 	}
 	clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+	btrfs_destroy_dev_zone_info(device);
 
 	device->fs_info = NULL;
 	atomic_set(&device->dev_stats_ccnt, 0);
@@ -2543,6 +2551,14 @@  int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	}
 	rcu_assign_pointer(device->name, name);
 
+	device->fs_info = fs_info;
+	device->bdev = bdev;
+
+	/* Get zone type information of zoned block devices */
+	ret = btrfs_get_dev_zone_info(device);
+	if (ret)
+		goto error_free_device;
+
 	trans = btrfs_start_transaction(root, 0);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
@@ -2559,8 +2575,6 @@  int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 					 fs_info->sectorsize);
 	device->disk_total_bytes = device->total_bytes;
 	device->commit_total_bytes = device->total_bytes;
-	device->fs_info = fs_info;
-	device->bdev = bdev;
 	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
 	clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
 	device->mode = FMODE_EXCL;
@@ -2707,6 +2721,7 @@  int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 		sb->s_flags |= SB_RDONLY;
 	if (trans)
 		btrfs_end_transaction(trans);
+	btrfs_destroy_dev_zone_info(device);
 error_free_device:
 	btrfs_free_device(device);
 error:
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index bf27ac07d315..9c07b97a2260 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -51,6 +51,8 @@  struct btrfs_io_geometry {
 #define BTRFS_DEV_STATE_REPLACE_TGT	(3)
 #define BTRFS_DEV_STATE_FLUSH_SENT	(4)
 
+struct btrfs_zoned_device_info;
+
 struct btrfs_device {
 	struct list_head dev_list; /* device_list_mutex */
 	struct list_head dev_alloc_list; /* chunk mutex */
@@ -64,6 +66,8 @@  struct btrfs_device {
 
 	struct block_device *bdev;
 
+	struct btrfs_zoned_device_info *zone_info;
+
 	/* the mode sent to blkdev_get */
 	fmode_t mode;
 
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
new file mode 100644
index 000000000000..b7ffe6670d3a
--- /dev/null
+++ b/fs/btrfs/zoned.c
@@ -0,0 +1,182 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include "ctree.h"
+#include "volumes.h"
+#include "zoned.h"
+#include "rcu-string.h"
+
+/* Maximum number of zones to report per blkdev_report_zones() call */
+#define BTRFS_REPORT_NR_ZONES   4096
+
+static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx,
+			     void *data)
+{
+	struct blk_zone *zones = data;
+
+	memcpy(&zones[idx], zone, sizeof(*zone));
+
+	return 0;
+}
+
+static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
+			       struct blk_zone *zones, unsigned int *nr_zones)
+{
+	int ret;
+
+	if (!*nr_zones)
+		return 0;
+
+	ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
+				  copy_zone_info_cb, zones);
+	if (ret < 0) {
+		btrfs_err_in_rcu(device->fs_info,
+				 "zoned: failed to read zone %llu on %s (devid %llu)",
+				 pos, rcu_str_deref(device->name),
+				 device->devid);
+		return ret;
+	}
+	*nr_zones = ret;
+	if (!ret)
+		return -EIO;
+
+	return 0;
+}
+
+int btrfs_get_dev_zone_info(struct btrfs_device *device)
+{
+	struct btrfs_zoned_device_info *zone_info = NULL;
+	struct block_device *bdev = device->bdev;
+	sector_t nr_sectors = bdev->bd_part->nr_sects;
+	sector_t sector = 0;
+	struct blk_zone *zones = NULL;
+	unsigned int i, nreported = 0, nr_zones;
+	unsigned int zone_sectors;
+	int ret;
+
+	if (!bdev_is_zoned(bdev))
+		return 0;
+
+	if (device->zone_info)
+		return 0;
+
+	zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL);
+	if (!zone_info)
+		return -ENOMEM;
+
+	zone_sectors = bdev_zone_sectors(bdev);
+	ASSERT(is_power_of_2(zone_sectors));
+	zone_info->zone_size = (u64)zone_sectors << SECTOR_SHIFT;
+	zone_info->zone_size_shift = ilog2(zone_info->zone_size);
+	zone_info->nr_zones = nr_sectors >> ilog2(bdev_zone_sectors(bdev));
+	if (!IS_ALIGNED(nr_sectors, zone_sectors))
+		zone_info->nr_zones++;
+
+	zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+	if (!zone_info->seq_zones) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
+	if (!zone_info->empty_zones) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	zones = kcalloc(BTRFS_REPORT_NR_ZONES,
+			sizeof(struct blk_zone), GFP_KERNEL);
+	if (!zones) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* Get zones type */
+	while (sector < nr_sectors) {
+		nr_zones = BTRFS_REPORT_NR_ZONES;
+		ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
+					  &nr_zones);
+		if (ret)
+			goto out;
+
+		for (i = 0; i < nr_zones; i++) {
+			if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
+				set_bit(nreported, zone_info->seq_zones);
+			if (zones[i].cond == BLK_ZONE_COND_EMPTY)
+				set_bit(nreported, zone_info->empty_zones);
+			nreported++;
+		}
+		sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
+	}
+
+	if (nreported != zone_info->nr_zones) {
+		btrfs_err_in_rcu(device->fs_info,
+				 "inconsistent number of zones on %s (%u / %u)",
+				 rcu_str_deref(device->name), nreported,
+				 zone_info->nr_zones);
+		ret = -EIO;
+		goto out;
+	}
+
+	kfree(zones);
+
+	device->zone_info = zone_info;
+
+	/*
+	 * This function is called from open_fs_devices(), which is before
+	 * we set the device->fs_info. So, we use pr_info instead of
+	 * btrfs_info to avoid printing confusing message like "BTRFS info
+	 * (device <unknown>) ..."
+	 */
+
+	rcu_read_lock();
+	if (device->fs_info)
+		btrfs_info(device->fs_info,
+			"host-%s zoned block device %s, %u zones of %llu bytes",
+			bdev_zoned_model(bdev) == BLK_ZONED_HM ? "managed" : "aware",
+			rcu_str_deref(device->name), zone_info->nr_zones,
+			zone_info->zone_size);
+	else
+		pr_info("BTRFS info: host-%s zoned block device %s, %u zones of %llu bytes",
+			bdev_zoned_model(bdev) == BLK_ZONED_HM ? "managed" : "aware",
+			rcu_str_deref(device->name), zone_info->nr_zones,
+			zone_info->zone_size);
+	rcu_read_unlock();
+
+	return 0;
+
+out:
+	kfree(zones);
+	bitmap_free(zone_info->empty_zones);
+	bitmap_free(zone_info->seq_zones);
+	kfree(zone_info);
+
+	return ret;
+}
+
+void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
+{
+	struct btrfs_zoned_device_info *zone_info = device->zone_info;
+
+	if (!zone_info)
+		return;
+
+	bitmap_free(zone_info->seq_zones);
+	bitmap_free(zone_info->empty_zones);
+	kfree(zone_info);
+	device->zone_info = NULL;
+}
+
+int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+		       struct blk_zone *zone)
+{
+	unsigned int nr_zones = 1;
+	int ret;
+
+	ret = btrfs_get_dev_zones(device, pos, zone, &nr_zones);
+	if (ret != 0 || !nr_zones)
+		return ret ? ret : -EIO;
+
+	return 0;
+}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
new file mode 100644
index 000000000000..c9e69ff87ab9
--- /dev/null
+++ b/fs/btrfs/zoned.h
@@ -0,0 +1,91 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef BTRFS_ZONED_H
+#define BTRFS_ZONED_H
+
+#include <linux/types.h>
+
+struct btrfs_zoned_device_info {
+	/*
+	 * Number of zones, zone size and types of zones if bdev is a
+	 * zoned block device.
+	 */
+	u64 zone_size;
+	u8  zone_size_shift;
+	u32 nr_zones;
+	unsigned long *seq_zones;
+	unsigned long *empty_zones;
+};
+
+#ifdef CONFIG_BLK_DEV_ZONED
+int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+		       struct blk_zone *zone);
+int btrfs_get_dev_zone_info(struct btrfs_device *device);
+void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+				     struct blk_zone *zone)
+{
+	return 0;
+}
+
+static inline int btrfs_get_dev_zone_info(struct btrfs_device *device)
+{
+	return 0;
+}
+
+static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { }
+
+#endif
+
+static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
+{
+	struct btrfs_zoned_device_info *zone_info = device->zone_info;
+
+	if (!zone_info)
+		return false;
+
+	return test_bit(pos >> zone_info->zone_size_shift,
+			zone_info->seq_zones);
+}
+
+static inline bool btrfs_dev_is_empty_zone(struct btrfs_device *device, u64 pos)
+{
+	struct btrfs_zoned_device_info *zone_info = device->zone_info;
+
+	if (!zone_info)
+		return true;
+
+	return test_bit(pos >> zone_info->zone_size_shift,
+			zone_info->empty_zones);
+}
+
+static inline void btrfs_dev_set_empty_zone_bit(struct btrfs_device *device,
+						u64 pos, bool set)
+{
+	struct btrfs_zoned_device_info *zone_info = device->zone_info;
+	unsigned int zno;
+
+	if (!zone_info)
+		return;
+
+	zno = pos >> zone_info->zone_size_shift;
+	if (set)
+		set_bit(zno, zone_info->empty_zones);
+	else
+		clear_bit(zno, zone_info->empty_zones);
+}
+
+static inline void btrfs_dev_set_zone_empty(struct btrfs_device *device,
+					    u64 pos)
+{
+	btrfs_dev_set_empty_zone_bit(device, pos, true);
+}
+
+static inline void btrfs_dev_clear_zone_empty(struct btrfs_device *device,
+					      u64 pos)
+{
+	btrfs_dev_set_empty_zone_bit(device, pos, false);
+}
+
+#endif