diff mbox series

[v4,30/30] btrfs: allow read-write for 4K sectorsize on 64K page size systems

Message ID 20210531085106.259490-31-wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: add data write support for subpage | expand

Commit Message

Qu Wenruo May 31, 2021, 8:51 a.m. UTC
Since now we support data and metadata read-write for subpage, remove
the RO requirement for subpage mount.

There are some extra limits though:
- For now, subpage RW mount is still considered experimental
  Thus that mount warning will still be there.

- No compression support
  There are still quite some PAGE_SIZE hard coded and quite some call
  sites use extent_clear_unlock_delalloc() to unlock locked_page.
  This will screw up subpage helpers

  Now for subpage RW mount, no matter whatever mount option or inode
  attr is set, all write will not be compressed.
  Although reading compressed data has no problem.

- No defrag for subpage case
  The defrag support for subpage case will come in later patches, which
  will also rework the defrag workflow.

- No inline extent will be created
  This is mostly due to the fact that filemap_fdatawrite_range() will
  trigger more write than the range specified.
  In fallocate calls, this behavior can make us to writeback which can
  be inlined, before we enlarge the isize.

  This is a very special corner case, and even current btrfs check won't
  report error on such inline extent + regular extent.
  But considering how much effort has been put to prevent such inline +
  regular, I'd prefer to cut off inline extent completely until we have
  a good solution.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/disk-io.c | 13 ++++---------
 fs/btrfs/inode.c   |  3 +++
 fs/btrfs/ioctl.c   |  6 ++++++
 fs/btrfs/super.c   |  7 -------
 fs/btrfs/sysfs.c   |  5 +++++
 5 files changed, 18 insertions(+), 16 deletions(-)

Comments

David Sterba June 2, 2021, 5:37 p.m. UTC | #1
On Mon, May 31, 2021 at 04:51:06PM +0800, Qu Wenruo wrote:
> Since now we support data and metadata read-write for subpage, remove
> the RO requirement for subpage mount.
> 
> There are some extra limits though:
> - For now, subpage RW mount is still considered experimental
>   Thus that mount warning will still be there.
> 
> - No compression support
>   There are still quite some PAGE_SIZE hard coded and quite some call
>   sites use extent_clear_unlock_delalloc() to unlock locked_page.
>   This will screw up subpage helpers
> 
>   Now for subpage RW mount, no matter whatever mount option or inode
>   attr is set, all write will not be compressed.
>   Although reading compressed data has no problem.
> 
> - No defrag for subpage case
>   The defrag support for subpage case will come in later patches, which
>   will also rework the defrag workflow.
> 
> - No inline extent will be created
>   This is mostly due to the fact that filemap_fdatawrite_range() will
>   trigger more write than the range specified.
>   In fallocate calls, this behavior can make us to writeback which can
>   be inlined, before we enlarge the isize.
> 
>   This is a very special corner case, and even current btrfs check won't
>   report error on such inline extent + regular extent.
>   But considering how much effort has been put to prevent such inline +
>   regular, I'd prefer to cut off inline extent completely until we have
>   a good solution.

I think the limitations are acceptable. Inline extents can be turned off
by mount option too, defrag is optional, compression has a fallback.

> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>  fs/btrfs/disk-io.c | 13 ++++---------
>  fs/btrfs/inode.c   |  3 +++
>  fs/btrfs/ioctl.c   |  6 ++++++
>  fs/btrfs/super.c   |  7 -------
>  fs/btrfs/sysfs.c   |  5 +++++
>  5 files changed, 18 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 2dd48f4bec8f..7c17cb7cf4fe 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3396,15 +3396,10 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
>  		goto fail_alloc;
>  	}
>  
> -	/* For 4K sector size support, it's only read-only */
> -	if (PAGE_SIZE == SZ_64K && sectorsize == SZ_4K) {
> -		if (!sb_rdonly(sb) || btrfs_super_log_root(disk_super)) {
> -			btrfs_err(fs_info,
> -	"subpage sectorsize %u only supported read-only for page size %lu",
> -				sectorsize, PAGE_SIZE);
> -			err = -EINVAL;
> -			goto fail_alloc;
> -		}
> +	if (sectorsize != PAGE_SIZE) {
> +		btrfs_warn(fs_info,
> +	"read-write for sector size %u with page size %lu is experimental",
> +			   sectorsize, PAGE_SIZE);
>  	}
>  	if (sectorsize != PAGE_SIZE) {
>  		if (btrfs_super_incompat_flags(fs_info->super_copy) &
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 85f465328ab2..27d56a77aa5f 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -490,6 +490,9 @@ static noinline int add_async_extent(struct async_chunk *cow,
>   */
>  static inline bool inode_can_compress(struct btrfs_inode *inode)
>  {
> +	/* Subpage doesn't support compress yet */
> +	if (inode->root->fs_info->sectorsize < PAGE_SIZE)
> +		return false;
>  	if (inode->flags & BTRFS_INODE_NODATACOW ||
>  	    inode->flags & BTRFS_INODE_NODATASUM)
>  		return false;
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 49cfa9772c1b..11adf4670c55 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -3028,6 +3028,12 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
>  		goto out;
>  	}
>  
> +	/* Subpage defrag will be supported in later commits */
> +	if (root->fs_info->sectorsize < PAGE_SIZE) {
> +		ret = -ENOTTY;

This should be -EOPNOTSUPP, that's what we've used eg. in zoned mode for
features that are not implemented yet but will be (fitrim), while for
nocow it's -EPERM as it's entirely incompatible.

ENOTTY would mean there's no such ioctl, which would be confusing
because the ioctl is otherwise valid for btrfs.
diff mbox series

Patch

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2dd48f4bec8f..7c17cb7cf4fe 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3396,15 +3396,10 @@  int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 		goto fail_alloc;
 	}
 
-	/* For 4K sector size support, it's only read-only */
-	if (PAGE_SIZE == SZ_64K && sectorsize == SZ_4K) {
-		if (!sb_rdonly(sb) || btrfs_super_log_root(disk_super)) {
-			btrfs_err(fs_info,
-	"subpage sectorsize %u only supported read-only for page size %lu",
-				sectorsize, PAGE_SIZE);
-			err = -EINVAL;
-			goto fail_alloc;
-		}
+	if (sectorsize != PAGE_SIZE) {
+		btrfs_warn(fs_info,
+	"read-write for sector size %u with page size %lu is experimental",
+			   sectorsize, PAGE_SIZE);
 	}
 	if (sectorsize != PAGE_SIZE) {
 		if (btrfs_super_incompat_flags(fs_info->super_copy) &
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 85f465328ab2..27d56a77aa5f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -490,6 +490,9 @@  static noinline int add_async_extent(struct async_chunk *cow,
  */
 static inline bool inode_can_compress(struct btrfs_inode *inode)
 {
+	/* Subpage doesn't support compress yet */
+	if (inode->root->fs_info->sectorsize < PAGE_SIZE)
+		return false;
 	if (inode->flags & BTRFS_INODE_NODATACOW ||
 	    inode->flags & BTRFS_INODE_NODATASUM)
 		return false;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 49cfa9772c1b..11adf4670c55 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3028,6 +3028,12 @@  static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
 		goto out;
 	}
 
+	/* Subpage defrag will be supported in later commits */
+	if (root->fs_info->sectorsize < PAGE_SIZE) {
+		ret = -ENOTTY;
+		goto out;
+	}
+
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFDIR:
 		if (!capable(CAP_SYS_ADMIN)) {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4a396c1147f1..b18d268abfbb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2053,13 +2053,6 @@  static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 			ret = -EINVAL;
 			goto restore;
 		}
-		if (fs_info->sectorsize < PAGE_SIZE) {
-			btrfs_warn(fs_info,
-	"read-write mount is not yet allowed for sectorsize %u page size %lu",
-				   fs_info->sectorsize, PAGE_SIZE);
-			ret = -EINVAL;
-			goto restore;
-		}
 
 		/*
 		 * NOTE: when remounting with a change that does writes, don't
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 436ac7b4b334..752461a79364 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -366,6 +366,11 @@  static ssize_t supported_sectorsizes_show(struct kobject *kobj,
 {
 	ssize_t ret = 0;
 
+	/* 4K sector size is also support with 64K page size */
+	if (PAGE_SIZE == SZ_64K)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%u ",
+				 SZ_4K);
+
 	/* Only sectorsize == PAGE_SIZE is now supported */
 	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%lu\n", PAGE_SIZE);