diff mbox series

[v9,35/41] btrfs: enable relocation in ZONED mode

Message ID 669d00d499b702413a51364b405280798df9c6c3.1604065695.git.naohiro.aota@wdc.com (mailing list archive)
State New, archived
Headers show
Series btrfs: zoned block device support | expand

Commit Message

Naohiro Aota Oct. 30, 2020, 1:51 p.m. UTC
To serialize allocation and submit_bio, we introduced mutex around them. As
a result, preallocation must be completely disabled to avoid a deadlock.

Since current relocation process relies on preallocation to move file data
extents, it must be handled in another way. In ZONED mode, we just truncate
the inode to the size that we wanted to pre-allocate. Then, we flush dirty
pages on the file before finishing relocation process.
run_delalloc_zoned() will handle all the allocation and submit IOs to the
underlying layers.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 fs/btrfs/relocation.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

Comments

Josef Bacik Nov. 3, 2020, 8:39 p.m. UTC | #1
On 10/30/20 9:51 AM, Naohiro Aota wrote:
> To serialize allocation and submit_bio, we introduced mutex around them. As
> a result, preallocation must be completely disabled to avoid a deadlock.
> 
> Since current relocation process relies on preallocation to move file data
> extents, it must be handled in another way. In ZONED mode, we just truncate
> the inode to the size that we wanted to pre-allocate. Then, we flush dirty
> pages on the file before finishing relocation process.
> run_delalloc_zoned() will handle all the allocation and submit IOs to the
> underlying layers.
> 
> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
> ---
>   fs/btrfs/relocation.c | 35 +++++++++++++++++++++++++++++++++--
>   1 file changed, 33 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index 3602806d71bd..44b697b881b6 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -2603,6 +2603,32 @@ static noinline_for_stack int prealloc_file_extent_cluster(
>   	if (ret)
>   		return ret;
>   
> +	/*
> +	 * In ZONED mode, we cannot preallocate the file region. Instead, we
> +	 * dirty and fiemap_write the region.
> +	 */
> +
> +	if (btrfs_is_zoned(inode->root->fs_info)) {
> +		struct btrfs_root *root = inode->root;
> +		struct btrfs_trans_handle *trans;
> +
> +		end = cluster->end - offset + 1;
> +		trans = btrfs_start_transaction(root, 1);
> +		if (IS_ERR(trans))
> +			return PTR_ERR(trans);
> +
> +		inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
> +		i_size_write(&inode->vfs_inode, end);
> +		ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
> +		if (ret) {
> +			btrfs_abort_transaction(trans, ret);
> +			btrfs_end_transaction(trans);
> +			return ret;
> +		}
> +
> +		return btrfs_end_transaction(trans);
> +	}

The reason we pre-allocate is because we need the extents to be exactly how long 
they are on disk.  If we have a 123mib extent, we need the extent it goes into 
to be exactly 123mib, otherwise we cannot simply relocate out its location. 
This doesn't do anything for us.  I'm not even sure we can do relocation for 
zoned since we have to maintain the original extent size, but if you can then 
it's going to be more complicated than just this.  You'll have to either forgo 
WRITE_APPEND for this case, or make sure you can actually write the whole 
ordered extent in one go.  Thanks,

Josef
diff mbox series

Patch

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 3602806d71bd..44b697b881b6 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2603,6 +2603,32 @@  static noinline_for_stack int prealloc_file_extent_cluster(
 	if (ret)
 		return ret;
 
+	/*
+	 * In ZONED mode, we cannot preallocate the file region. Instead, we
+	 * dirty and fiemap_write the region.
+	 */
+
+	if (btrfs_is_zoned(inode->root->fs_info)) {
+		struct btrfs_root *root = inode->root;
+		struct btrfs_trans_handle *trans;
+
+		end = cluster->end - offset + 1;
+		trans = btrfs_start_transaction(root, 1);
+		if (IS_ERR(trans))
+			return PTR_ERR(trans);
+
+		inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
+		i_size_write(&inode->vfs_inode, end);
+		ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
+		if (ret) {
+			btrfs_abort_transaction(trans, ret);
+			btrfs_end_transaction(trans);
+			return ret;
+		}
+
+		return btrfs_end_transaction(trans);
+	}
+
 	inode_lock(&inode->vfs_inode);
 	for (nr = 0; nr < cluster->nr; nr++) {
 		start = cluster->boundary[nr] - offset;
@@ -2799,6 +2825,8 @@  static int relocate_file_extent_cluster(struct inode *inode,
 		}
 	}
 	WARN_ON(nr != cluster->nr);
+	if (btrfs_is_zoned(fs_info) && !ret)
+		ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
 out:
 	kfree(ra);
 	return ret;
@@ -3434,8 +3462,12 @@  static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
 	struct btrfs_path *path;
 	struct btrfs_inode_item *item;
 	struct extent_buffer *leaf;
+	u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
 	int ret;
 
+	if (btrfs_is_zoned(trans->fs_info))
+		flags &= ~BTRFS_INODE_PREALLOC;
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -3450,8 +3482,7 @@  static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
 	btrfs_set_inode_generation(leaf, item, 1);
 	btrfs_set_inode_size(leaf, item, 0);
 	btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
-	btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
-					  BTRFS_INODE_PREALLOC);
+	btrfs_set_inode_flags(leaf, item, flags);
 	btrfs_mark_buffer_dirty(leaf);
 out:
 	btrfs_free_path(path);