diff mbox series

[v9,18/41] btrfs: reset zones of unused block groups

Message ID 575e495d534c44aded9e6ae042a9d6bda5c84162.1604065695.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned block device support | expand

Commit Message

Naohiro Aota Oct. 30, 2020, 1:51 p.m. UTC
For an ZONED volume, a block group maps to a zone of the device. For
deleted unused block groups, the zone of the block group can be reset to
rewind the zone write pointer at the start of the zone.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/block-group.c |  8 ++++++--
 fs/btrfs/extent-tree.c | 17 ++++++++++++-----
 fs/btrfs/zoned.h       | 16 ++++++++++++++++
 3 files changed, 34 insertions(+), 7 deletions(-)

Comments

Josef Bacik Nov. 3, 2020, 2:34 p.m. UTC | #1
On 10/30/20 9:51 AM, Naohiro Aota wrote:
> For an ZONED volume, a block group maps to a zone of the device. For
> deleted unused block groups, the zone of the block group can be reset to
> rewind the zone write pointer at the start of the zone.
> 
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>   fs/btrfs/block-group.c |  8 ++++++--
>   fs/btrfs/extent-tree.c | 17 ++++++++++++-----
>   fs/btrfs/zoned.h       | 16 ++++++++++++++++
>   3 files changed, 34 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index d67f9cabe5c1..82d556368c85 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1468,8 +1468,12 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
>   		if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
>   			goto flip_async;
>   
> -		/* DISCARD can flip during remount */
> -		trimming = btrfs_test_opt(fs_info, DISCARD_SYNC);
> +		/*
> +		 * DISCARD can flip during remount. In ZONED mode, we need
> +		 * to reset sequential required zones.
> +		 */
> +		trimming = btrfs_test_opt(fs_info, DISCARD_SYNC) ||
> +				btrfs_is_zoned(fs_info);
>   
>   		/* Implicit trim during transaction commit. */
>   		if (trimming)
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 5e6b4d1712f2..c134746d7417 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -1331,6 +1331,9 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
>   
>   		stripe = bbio->stripes;
>   		for (i = 0; i < bbio->num_stripes; i++, stripe++) {
> +			struct btrfs_device *dev = stripe->dev;
> +			u64 physical = stripe->physical;
> +			u64 length = stripe->length;
>   			u64 bytes;
>   			struct request_queue *req_q;
>   
> @@ -1338,14 +1341,18 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
>   				ASSERT(btrfs_test_opt(fs_info, DEGRADED));
>   				continue;
>   			}
> +
>   			req_q = bdev_get_queue(stripe->dev->bdev);
> -			if (!blk_queue_discard(req_q))
> +			/* zone reset in ZONED mode */
> +			if (btrfs_can_zone_reset(dev, physical, length))
> +				ret = btrfs_reset_device_zone(dev, physical,
> +							      length, &bytes);
> +			else if (blk_queue_discard(req_q))
> +				ret = btrfs_issue_discard(dev->bdev, physical,
> +							  length, &bytes);
> +			else
>   				continue;
>   
> -			ret = btrfs_issue_discard(stripe->dev->bdev,
> -						  stripe->physical,
> -						  stripe->length,
> -						  &bytes);

The problem is you have

if (btrfs_test_opt(fs_info, DISCARD_SYNC))
	ret = btrfs_discard_extent(fs_info, start,
				   end + 1 - start, NULL);

in btrfs_finish_extent_commit, so even if you add support here, you aren't 
actually discarding anything because the transaction commit won't call discard 
for this range.

You're going to have to rework this logic to allow for discard to be called 
everywhere it checks DISCARD_SYNC, but then you also need to go and make sure 
you don't actually allow discards to happen at non-bg aligned ranges.  Thanks,

Josef
Naohiro Aota Nov. 10, 2020, 10:40 a.m. UTC | #2
On Tue, Nov 03, 2020 at 09:34:19AM -0500, Josef Bacik wrote:
>On 10/30/20 9:51 AM, Naohiro Aota wrote:
>>For an ZONED volume, a block group maps to a zone of the device. For
>>deleted unused block groups, the zone of the block group can be reset to
>>rewind the zone write pointer at the start of the zone.
>>
>>Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
>>---
>>  fs/btrfs/block-group.c |  8 ++++++--
>>  fs/btrfs/extent-tree.c | 17 ++++++++++++-----
>>  fs/btrfs/zoned.h       | 16 ++++++++++++++++
>>  3 files changed, 34 insertions(+), 7 deletions(-)
>>
>>diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
>>index d67f9cabe5c1..82d556368c85 100644
>>--- a/fs/btrfs/block-group.c
>>+++ b/fs/btrfs/block-group.c
>>@@ -1468,8 +1468,12 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
>>  		if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
>>  			goto flip_async;
>>-		/* DISCARD can flip during remount */
>>-		trimming = btrfs_test_opt(fs_info, DISCARD_SYNC);
>>+		/*
>>+		 * DISCARD can flip during remount. In ZONED mode, we need
>>+		 * to reset sequential required zones.
>>+		 */
>>+		trimming = btrfs_test_opt(fs_info, DISCARD_SYNC) ||
>>+				btrfs_is_zoned(fs_info);
>>  		/* Implicit trim during transaction commit. */
>>  		if (trimming)
>>diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>>index 5e6b4d1712f2..c134746d7417 100644
>>--- a/fs/btrfs/extent-tree.c
>>+++ b/fs/btrfs/extent-tree.c
>>@@ -1331,6 +1331,9 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
>>  		stripe = bbio->stripes;
>>  		for (i = 0; i < bbio->num_stripes; i++, stripe++) {
>>+			struct btrfs_device *dev = stripe->dev;
>>+			u64 physical = stripe->physical;
>>+			u64 length = stripe->length;
>>  			u64 bytes;
>>  			struct request_queue *req_q;
>>@@ -1338,14 +1341,18 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
>>  				ASSERT(btrfs_test_opt(fs_info, DEGRADED));
>>  				continue;
>>  			}
>>+
>>  			req_q = bdev_get_queue(stripe->dev->bdev);
>>-			if (!blk_queue_discard(req_q))
>>+			/* zone reset in ZONED mode */
>>+			if (btrfs_can_zone_reset(dev, physical, length))
>>+				ret = btrfs_reset_device_zone(dev, physical,
>>+							      length, &bytes);
>>+			else if (blk_queue_discard(req_q))
>>+				ret = btrfs_issue_discard(dev->bdev, physical,
>>+							  length, &bytes);
>>+			else
>>  				continue;
>>-			ret = btrfs_issue_discard(stripe->dev->bdev,
>>-						  stripe->physical,
>>-						  stripe->length,
>>-						  &bytes);
>
>The problem is you have
>
>if (btrfs_test_opt(fs_info, DISCARD_SYNC))
>	ret = btrfs_discard_extent(fs_info, start,
>				   end + 1 - start, NULL);
>
>in btrfs_finish_extent_commit, so even if you add support here, you 
>aren't actually discarding anything because the transaction commit 
>won't call discard for this range.

btrfs_discard_extent() is called for each deleted_bg on the BG area,
regardless of the above pinned_extent's range.

>
>You're going to have to rework this logic to allow for discard to be 
>called everywhere it checks DISCARD_SYNC, but then you also need to go 
>and make sure you don't actually allow discards to happen at non-bg 
>aligned ranges.  Thanks,
>
>Josef

We already ignore btrfs_discard_extent() which is not aligned to zone. So,
I confirmed it worked fine with DISCARD_SYNC.
diff mbox series

Patch

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index d67f9cabe5c1..82d556368c85 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1468,8 +1468,12 @@  void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 		if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
 			goto flip_async;
 
-		/* DISCARD can flip during remount */
-		trimming = btrfs_test_opt(fs_info, DISCARD_SYNC);
+		/*
+		 * DISCARD can flip during remount. In ZONED mode, we need
+		 * to reset sequential required zones.
+		 */
+		trimming = btrfs_test_opt(fs_info, DISCARD_SYNC) ||
+				btrfs_is_zoned(fs_info);
 
 		/* Implicit trim during transaction commit. */
 		if (trimming)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5e6b4d1712f2..c134746d7417 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1331,6 +1331,9 @@  int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 
 		stripe = bbio->stripes;
 		for (i = 0; i < bbio->num_stripes; i++, stripe++) {
+			struct btrfs_device *dev = stripe->dev;
+			u64 physical = stripe->physical;
+			u64 length = stripe->length;
 			u64 bytes;
 			struct request_queue *req_q;
 
@@ -1338,14 +1341,18 @@  int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 				ASSERT(btrfs_test_opt(fs_info, DEGRADED));
 				continue;
 			}
+
 			req_q = bdev_get_queue(stripe->dev->bdev);
-			if (!blk_queue_discard(req_q))
+			/* zone reset in ZONED mode */
+			if (btrfs_can_zone_reset(dev, physical, length))
+				ret = btrfs_reset_device_zone(dev, physical,
+							      length, &bytes);
+			else if (blk_queue_discard(req_q))
+				ret = btrfs_issue_discard(dev->bdev, physical,
+							  length, &bytes);
+			else
 				continue;
 
-			ret = btrfs_issue_discard(stripe->dev->bdev,
-						  stripe->physical,
-						  stripe->length,
-						  &bytes);
 			if (!ret) {
 				discarded_bytes += bytes;
 			} else if (ret != -EOPNOTSUPP) {
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 42b1a4217c6b..d13bc6d70ea4 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -193,4 +193,20 @@  static inline u64 btrfs_zone_align(struct btrfs_device *device, u64 pos)
 	return ALIGN(pos, device->zone_info->zone_size);
 }
 
+static inline bool btrfs_can_zone_reset(struct btrfs_device *device,
+					u64 physical, u64 length)
+{
+	u64 zone_size;
+
+	if (!btrfs_dev_is_sequential(device, physical))
+		return false;
+
+	zone_size = device->zone_info->zone_size;
+	if (!IS_ALIGNED(physical, zone_size) ||
+	    !IS_ALIGNED(length, zone_size))
+		return false;
+
+	return true;
+}
+
 #endif