diff mbox series

[3/9] btrfs: track original extent subvol in a new inline ref

Message ID 7a4b78e240d2f26eb3d7be82d4c0b8ddaa409519.1683075170.git.boris@bur.io (mailing list archive)
State New, archived
Headers show
Series btrfs: simple quotas | expand

Commit Message

Boris Burkov May 3, 2023, 12:59 a.m. UTC
In order to implement simple quota groups, we need to be able to
associate a data extent with the subvolume that created it. Once you
account for reflink, this information cannot be recovered without
explicitly storing it. Options for storing it are:
- a new key/item
- a new extent inline ref item

The former is backwards compatible, but wastes space, the latter is
incompat, but is efficient in space and reuses the existing inline ref
machinery, while only abusing it a tiny amount -- specifically, the new
item is not a ref, per-se.

Signed-off-by: Boris Burkov <boris@bur.io>
---
 fs/btrfs/accessors.h            |  4 +++
 fs/btrfs/backref.c              |  3 ++
 fs/btrfs/extent-tree.c          | 50 +++++++++++++++++++++++++--------
 fs/btrfs/print-tree.c           | 12 ++++++++
 fs/btrfs/ref-verify.c           |  3 ++
 fs/btrfs/tree-checker.c         |  3 ++
 include/uapi/linux/btrfs_tree.h |  6 ++++
 7 files changed, 70 insertions(+), 11 deletions(-)

Comments

Qu Wenruo May 3, 2023, 3:17 a.m. UTC | #1
On 2023/5/3 08:59, Boris Burkov wrote:
> In order to implement simple quota groups, we need to be able to
> associate a data extent with the subvolume that created it. Once you
> account for reflink, this information cannot be recovered without
> explicitly storing it. Options for storing it are:
> - a new key/item
> - a new extent inline ref item
> 
> The former is backwards compatible, but wastes space, the latter is
> incompat, but is efficient in space and reuses the existing inline ref
> machinery, while only abusing it a tiny amount -- specifically, the new
> item is not a ref, per-se.

Even we introduce new extent tree items, we can still mark the fs compat_ro.

As long as we don't do any writes, we can still read the fs without any 
compatibility problem, and the enable/disable should be addressed by 
btrfstune/mkfs anyway.

Thanks,
Qu
> 
> Signed-off-by: Boris Burkov <boris@bur.io>
> ---
>   fs/btrfs/accessors.h            |  4 +++
>   fs/btrfs/backref.c              |  3 ++
>   fs/btrfs/extent-tree.c          | 50 +++++++++++++++++++++++++--------
>   fs/btrfs/print-tree.c           | 12 ++++++++
>   fs/btrfs/ref-verify.c           |  3 ++
>   fs/btrfs/tree-checker.c         |  3 ++
>   include/uapi/linux/btrfs_tree.h |  6 ++++
>   7 files changed, 70 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
> index ceadfc5d6c66..aab61312e4e8 100644
> --- a/fs/btrfs/accessors.h
> +++ b/fs/btrfs/accessors.h
> @@ -350,6 +350,8 @@ BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, count, 3
>   
>   BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, count, 32);
>   
> +BTRFS_SETGET_FUNCS(extent_owner_ref_root_id, struct btrfs_extent_owner_ref, root_id, 64);
> +
>   BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
>   		   type, 8);
>   BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
> @@ -366,6 +368,8 @@ static inline u32 btrfs_extent_inline_ref_size(int type)
>   	if (type == BTRFS_EXTENT_DATA_REF_KEY)
>   		return sizeof(struct btrfs_extent_data_ref) +
>   		       offsetof(struct btrfs_extent_inline_ref, offset);
> +	if (type == BTRFS_EXTENT_OWNER_REF_KEY)
> +		return sizeof(struct btrfs_extent_inline_ref);
>   	return 0;
>   }
>   
> diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
> index e54f0884802a..8cd8ed6c572f 100644
> --- a/fs/btrfs/backref.c
> +++ b/fs/btrfs/backref.c
> @@ -1128,6 +1128,9 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
>   						       count, sc, GFP_NOFS);
>   			break;
>   		}
> +		case BTRFS_EXTENT_OWNER_REF_KEY:
> +			WARN_ON(!btrfs_fs_incompat(ctx->fs_info, SIMPLE_QUOTA));
> +			break;
>   		default:
>   			WARN_ON(1);
>   		}
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 5cd289de4e92..b9a2f1e355b7 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -363,9 +363,13 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
>   				     struct btrfs_extent_inline_ref *iref,
>   				     enum btrfs_inline_ref_type is_data)
>   {
> +	struct btrfs_fs_info *fs_info = eb->fs_info;
>   	int type = btrfs_extent_inline_ref_type(eb, iref);
>   	u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
>   
> +	if (type == BTRFS_EXTENT_OWNER_REF_KEY && btrfs_fs_incompat(fs_info, SIMPLE_QUOTA))
> +		return type;
> +
>   	if (type == BTRFS_TREE_BLOCK_REF_KEY ||
>   	    type == BTRFS_SHARED_BLOCK_REF_KEY ||
>   	    type == BTRFS_SHARED_DATA_REF_KEY ||
> @@ -374,26 +378,25 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
>   			if (type == BTRFS_TREE_BLOCK_REF_KEY)
>   				return type;
>   			if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
> -				ASSERT(eb->fs_info);
> +				ASSERT(fs_info);
>   				/*
>   				 * Every shared one has parent tree block,
>   				 * which must be aligned to sector size.
>   				 */
> -				if (offset &&
> -				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
> +				if (offset && IS_ALIGNED(offset, fs_info->sectorsize))
>   					return type;
>   			}
>   		} else if (is_data == BTRFS_REF_TYPE_DATA) {
>   			if (type == BTRFS_EXTENT_DATA_REF_KEY)
>   				return type;
>   			if (type == BTRFS_SHARED_DATA_REF_KEY) {
> -				ASSERT(eb->fs_info);
> +				ASSERT(fs_info);
>   				/*
>   				 * Every shared one has parent tree block,
>   				 * which must be aligned to sector size.
>   				 */
>   				if (offset &&
> -				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
> +				    IS_ALIGNED(offset, fs_info->sectorsize))
>   					return type;
>   			}
>   		} else {
> @@ -403,7 +406,7 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
>   	}
>   
>   	btrfs_print_leaf((struct extent_buffer *)eb);
> -	btrfs_err(eb->fs_info,
> +	btrfs_err(fs_info,
>   		  "eb %llu iref 0x%lx invalid extent inline ref type %d",
>   		  eb->start, (unsigned long)iref, type);
>   	WARN_ON(1);
> @@ -912,6 +915,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
>   		}
>   		iref = (struct btrfs_extent_inline_ref *)ptr;
>   		type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
> +		if (type == BTRFS_EXTENT_OWNER_REF_KEY) {
> +			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
> +			ptr += btrfs_extent_inline_ref_size(type);
> +			continue;
> +		}
>   		if (type == BTRFS_REF_TYPE_INVALID) {
>   			err = -EUCLEAN;
>   			goto out;
> @@ -1708,6 +1716,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
>   		 node->type == BTRFS_SHARED_DATA_REF_KEY)
>   		ret = run_delayed_data_ref(trans, node, extent_op,
>   					   insert_reserved);
> +	else if (node->type == BTRFS_EXTENT_OWNER_REF_KEY)
> +		ret = 0;
>   	else
>   		BUG();
>   	if (ret && insert_reserved)
> @@ -2275,6 +2285,7 @@ static noinline int check_committed_ref(struct btrfs_root *root,
>   	struct btrfs_extent_item *ei;
>   	struct btrfs_key key;
>   	u32 item_size;
> +	u32 expected_size;
>   	int type;
>   	int ret;
>   
> @@ -2301,10 +2312,17 @@ static noinline int check_committed_ref(struct btrfs_root *root,
>   	ret = 1;
>   	item_size = btrfs_item_size(leaf, path->slots[0]);
>   	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
> +	expected_size = sizeof(*ei) + btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY);
> +
> +	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
> +	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
> +	if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA) && type == BTRFS_EXTENT_OWNER_REF_KEY) {
> +		expected_size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
> +		iref = (struct btrfs_extent_inline_ref *)(iref + 1);
> +	}
>   
>   	/* If extent item has more than 1 inline ref then it's shared */
> -	if (item_size != sizeof(*ei) +
> -	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
> +	if (item_size != expected_size)
>   		goto out;
>   
>   	/*
> @@ -2316,8 +2334,6 @@ static noinline int check_committed_ref(struct btrfs_root *root,
>   	     btrfs_root_last_snapshot(&root->root_item)))
>   		goto out;
>   
> -	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
> -
>   	/* If this extent has SHARED_DATA_REF then it's shared */
>   	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
>   	if (type != BTRFS_EXTENT_DATA_REF_KEY)
> @@ -4572,6 +4588,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
>   	struct btrfs_root *extent_root;
>   	int ret;
>   	struct btrfs_extent_item *extent_item;
> +	struct btrfs_extent_owner_ref *oref;
>   	struct btrfs_extent_inline_ref *iref;
>   	struct btrfs_path *path;
>   	struct extent_buffer *leaf;
> @@ -4583,7 +4600,10 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
>   	else
>   		type = BTRFS_EXTENT_DATA_REF_KEY;
>   
> -	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
> +	size = sizeof(*extent_item);
> +	if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE)
> +		size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
> +	size += btrfs_extent_inline_ref_size(type);
>   
>   	path = btrfs_alloc_path();
>   	if (!path)
> @@ -4604,8 +4624,16 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
>   	btrfs_set_extent_flags(leaf, extent_item,
>   			       flags | BTRFS_EXTENT_FLAG_DATA);
>   
> +
>   	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
> +	if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) {
> +		btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_EXTENT_OWNER_REF_KEY);
> +		oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
> +		btrfs_set_extent_owner_ref_root_id(leaf, oref, root_objectid);
> +		iref = (struct btrfs_extent_inline_ref *)(oref + 1);
> +	}
>   	btrfs_set_extent_inline_ref_type(leaf, iref, type);
> +
>   	if (parent > 0) {
>   		struct btrfs_shared_data_ref *ref;
>   		ref = (struct btrfs_shared_data_ref *)(iref + 1);
> diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
> index b93c96213304..1114cd915bd8 100644
> --- a/fs/btrfs/print-tree.c
> +++ b/fs/btrfs/print-tree.c
> @@ -80,12 +80,20 @@ static void print_extent_data_ref(struct extent_buffer *eb,
>   	       btrfs_extent_data_ref_count(eb, ref));
>   }
>   
> +static void print_extent_owner_ref(struct extent_buffer *eb,
> +				   struct btrfs_extent_owner_ref *ref)
> +{
> +	WARN_ON(!btrfs_fs_incompat(eb->fs_info, SIMPLE_QUOTA));
> +	pr_cont("extent data owner root %llu\n", btrfs_extent_owner_ref_root_id(eb, ref));
> +}
> +
>   static void print_extent_item(struct extent_buffer *eb, int slot, int type)
>   {
>   	struct btrfs_extent_item *ei;
>   	struct btrfs_extent_inline_ref *iref;
>   	struct btrfs_extent_data_ref *dref;
>   	struct btrfs_shared_data_ref *sref;
> +	struct btrfs_extent_owner_ref *oref;
>   	struct btrfs_disk_key key;
>   	unsigned long end;
>   	unsigned long ptr;
> @@ -159,6 +167,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
>   			"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
>   				     offset, eb->fs_info->sectorsize);
>   			break;
> +		case BTRFS_EXTENT_OWNER_REF_KEY:
> +			oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
> +			print_extent_owner_ref(eb, oref);
> +			break;
>   		default:
>   			pr_cont("(extent %llu has INVALID ref type %d)\n",
>   				  eb->start, type);
> diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
> index 95d28497de7c..9edc87eaff1f 100644
> --- a/fs/btrfs/ref-verify.c
> +++ b/fs/btrfs/ref-verify.c
> @@ -485,6 +485,9 @@ static int process_extent_item(struct btrfs_fs_info *fs_info,
>   			ret = add_shared_data_ref(fs_info, offset, count,
>   						  key->objectid, key->offset);
>   			break;
> +		case BTRFS_EXTENT_OWNER_REF_KEY:
> +			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
> +			break;
>   		default:
>   			btrfs_err(fs_info, "invalid key type in iref");
>   			ret = -EINVAL;
> diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
> index e2b54793bf0c..27d4230a38a8 100644
> --- a/fs/btrfs/tree-checker.c
> +++ b/fs/btrfs/tree-checker.c
> @@ -1451,6 +1451,9 @@ static int check_extent_item(struct extent_buffer *leaf,
>   			}
>   			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
>   			break;
> +		case BTRFS_EXTENT_OWNER_REF_KEY:
> +			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
> +			break;
>   		default:
>   			extent_err(leaf, slot, "unknown inline ref type: %u",
>   				   inline_type);
> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> index ab38d0f411fa..424c7f342712 100644
> --- a/include/uapi/linux/btrfs_tree.h
> +++ b/include/uapi/linux/btrfs_tree.h
> @@ -226,6 +226,8 @@
>   
>   #define BTRFS_SHARED_DATA_REF_KEY	184
>   
> +#define BTRFS_EXTENT_OWNER_REF_KEY	190
> +
>   /*
>    * block groups give us hints into the extent allocation trees.  Which
>    * blocks are free etc etc
> @@ -783,6 +785,10 @@ struct btrfs_shared_data_ref {
>   	__le32 count;
>   } __attribute__ ((__packed__));
>   
> +struct btrfs_extent_owner_ref {
> +	u64 root_id;
> +} __attribute__ ((__packed__));
> +
>   struct btrfs_extent_inline_ref {
>   	__u8 type;
>   	__le64 offset;
Boris Burkov May 4, 2023, 4:17 p.m. UTC | #2
On Wed, May 03, 2023 at 11:17:12AM +0800, Qu Wenruo wrote:
> 
> 
> On 2023/5/3 08:59, Boris Burkov wrote:
> > In order to implement simple quota groups, we need to be able to
> > associate a data extent with the subvolume that created it. Once you
> > account for reflink, this information cannot be recovered without
> > explicitly storing it. Options for storing it are:
> > - a new key/item
> > - a new extent inline ref item
> > 
> > The former is backwards compatible, but wastes space, the latter is
> > incompat, but is efficient in space and reuses the existing inline ref
> > machinery, while only abusing it a tiny amount -- specifically, the new
> > item is not a ref, per-se.
> 
> Even we introduce new extent tree items, we can still mark the fs compat_ro.
> 
> As long as we don't do any writes, we can still read the fs without any
> compatibility problem, and the enable/disable should be addressed by
> btrfstune/mkfs anyway.

Unfortunately, I don't believe compat_ro is possible with this design.
Because of how inline ref items are implemented, there is a lot of code
that makes assumptions about the extent item size, and the inline ref
item size based on their type. The best example that definitely breaks
things rather than maybe just warning is check_extent in tree-checker.c

With a new unparseable ref item inserted in the sequence of refs, that
code will either overflow or detect padding. The size calculation comes
up 0, etc. Perhaps there is a clever way to trick it, but I have not
seen it yet.

I was able to make it compat_ro by introducing an entirely new item for
the owner ref, but that comes with a per extent disk usage tradeoff that
is fairly steep for storing just a single u64.

> 
> Thanks,
> Qu
> > 
> > Signed-off-by: Boris Burkov <boris@bur.io>
> > ---
> >   fs/btrfs/accessors.h            |  4 +++
> >   fs/btrfs/backref.c              |  3 ++
> >   fs/btrfs/extent-tree.c          | 50 +++++++++++++++++++++++++--------
> >   fs/btrfs/print-tree.c           | 12 ++++++++
> >   fs/btrfs/ref-verify.c           |  3 ++
> >   fs/btrfs/tree-checker.c         |  3 ++
> >   include/uapi/linux/btrfs_tree.h |  6 ++++
> >   7 files changed, 70 insertions(+), 11 deletions(-)
> > 
> > diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
> > index ceadfc5d6c66..aab61312e4e8 100644
> > --- a/fs/btrfs/accessors.h
> > +++ b/fs/btrfs/accessors.h
> > @@ -350,6 +350,8 @@ BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, count, 3
> >   BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, count, 32);
> > +BTRFS_SETGET_FUNCS(extent_owner_ref_root_id, struct btrfs_extent_owner_ref, root_id, 64);
> > +
> >   BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
> >   		   type, 8);
> >   BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
> > @@ -366,6 +368,8 @@ static inline u32 btrfs_extent_inline_ref_size(int type)
> >   	if (type == BTRFS_EXTENT_DATA_REF_KEY)
> >   		return sizeof(struct btrfs_extent_data_ref) +
> >   		       offsetof(struct btrfs_extent_inline_ref, offset);
> > +	if (type == BTRFS_EXTENT_OWNER_REF_KEY)
> > +		return sizeof(struct btrfs_extent_inline_ref);
> >   	return 0;
> >   }
> > diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
> > index e54f0884802a..8cd8ed6c572f 100644
> > --- a/fs/btrfs/backref.c
> > +++ b/fs/btrfs/backref.c
> > @@ -1128,6 +1128,9 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
> >   						       count, sc, GFP_NOFS);
> >   			break;
> >   		}
> > +		case BTRFS_EXTENT_OWNER_REF_KEY:
> > +			WARN_ON(!btrfs_fs_incompat(ctx->fs_info, SIMPLE_QUOTA));
> > +			break;
> >   		default:
> >   			WARN_ON(1);
> >   		}
> > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> > index 5cd289de4e92..b9a2f1e355b7 100644
> > --- a/fs/btrfs/extent-tree.c
> > +++ b/fs/btrfs/extent-tree.c
> > @@ -363,9 +363,13 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
> >   				     struct btrfs_extent_inline_ref *iref,
> >   				     enum btrfs_inline_ref_type is_data)
> >   {
> > +	struct btrfs_fs_info *fs_info = eb->fs_info;
> >   	int type = btrfs_extent_inline_ref_type(eb, iref);
> >   	u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
> > +	if (type == BTRFS_EXTENT_OWNER_REF_KEY && btrfs_fs_incompat(fs_info, SIMPLE_QUOTA))
> > +		return type;
> > +
> >   	if (type == BTRFS_TREE_BLOCK_REF_KEY ||
> >   	    type == BTRFS_SHARED_BLOCK_REF_KEY ||
> >   	    type == BTRFS_SHARED_DATA_REF_KEY ||
> > @@ -374,26 +378,25 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
> >   			if (type == BTRFS_TREE_BLOCK_REF_KEY)
> >   				return type;
> >   			if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
> > -				ASSERT(eb->fs_info);
> > +				ASSERT(fs_info);
> >   				/*
> >   				 * Every shared one has parent tree block,
> >   				 * which must be aligned to sector size.
> >   				 */
> > -				if (offset &&
> > -				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
> > +				if (offset && IS_ALIGNED(offset, fs_info->sectorsize))
> >   					return type;
> >   			}
> >   		} else if (is_data == BTRFS_REF_TYPE_DATA) {
> >   			if (type == BTRFS_EXTENT_DATA_REF_KEY)
> >   				return type;
> >   			if (type == BTRFS_SHARED_DATA_REF_KEY) {
> > -				ASSERT(eb->fs_info);
> > +				ASSERT(fs_info);
> >   				/*
> >   				 * Every shared one has parent tree block,
> >   				 * which must be aligned to sector size.
> >   				 */
> >   				if (offset &&
> > -				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
> > +				    IS_ALIGNED(offset, fs_info->sectorsize))
> >   					return type;
> >   			}
> >   		} else {
> > @@ -403,7 +406,7 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
> >   	}
> >   	btrfs_print_leaf((struct extent_buffer *)eb);
> > -	btrfs_err(eb->fs_info,
> > +	btrfs_err(fs_info,
> >   		  "eb %llu iref 0x%lx invalid extent inline ref type %d",
> >   		  eb->start, (unsigned long)iref, type);
> >   	WARN_ON(1);
> > @@ -912,6 +915,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
> >   		}
> >   		iref = (struct btrfs_extent_inline_ref *)ptr;
> >   		type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
> > +		if (type == BTRFS_EXTENT_OWNER_REF_KEY) {
> > +			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
> > +			ptr += btrfs_extent_inline_ref_size(type);
> > +			continue;
> > +		}
> >   		if (type == BTRFS_REF_TYPE_INVALID) {
> >   			err = -EUCLEAN;
> >   			goto out;
> > @@ -1708,6 +1716,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
> >   		 node->type == BTRFS_SHARED_DATA_REF_KEY)
> >   		ret = run_delayed_data_ref(trans, node, extent_op,
> >   					   insert_reserved);
> > +	else if (node->type == BTRFS_EXTENT_OWNER_REF_KEY)
> > +		ret = 0;
> >   	else
> >   		BUG();
> >   	if (ret && insert_reserved)
> > @@ -2275,6 +2285,7 @@ static noinline int check_committed_ref(struct btrfs_root *root,
> >   	struct btrfs_extent_item *ei;
> >   	struct btrfs_key key;
> >   	u32 item_size;
> > +	u32 expected_size;
> >   	int type;
> >   	int ret;
> > @@ -2301,10 +2312,17 @@ static noinline int check_committed_ref(struct btrfs_root *root,
> >   	ret = 1;
> >   	item_size = btrfs_item_size(leaf, path->slots[0]);
> >   	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
> > +	expected_size = sizeof(*ei) + btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY);
> > +
> > +	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
> > +	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
> > +	if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA) && type == BTRFS_EXTENT_OWNER_REF_KEY) {
> > +		expected_size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
> > +		iref = (struct btrfs_extent_inline_ref *)(iref + 1);
> > +	}
> >   	/* If extent item has more than 1 inline ref then it's shared */
> > -	if (item_size != sizeof(*ei) +
> > -	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
> > +	if (item_size != expected_size)
> >   		goto out;
> >   	/*
> > @@ -2316,8 +2334,6 @@ static noinline int check_committed_ref(struct btrfs_root *root,
> >   	     btrfs_root_last_snapshot(&root->root_item)))
> >   		goto out;
> > -	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
> > -
> >   	/* If this extent has SHARED_DATA_REF then it's shared */
> >   	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
> >   	if (type != BTRFS_EXTENT_DATA_REF_KEY)
> > @@ -4572,6 +4588,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
> >   	struct btrfs_root *extent_root;
> >   	int ret;
> >   	struct btrfs_extent_item *extent_item;
> > +	struct btrfs_extent_owner_ref *oref;
> >   	struct btrfs_extent_inline_ref *iref;
> >   	struct btrfs_path *path;
> >   	struct extent_buffer *leaf;
> > @@ -4583,7 +4600,10 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
> >   	else
> >   		type = BTRFS_EXTENT_DATA_REF_KEY;
> > -	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
> > +	size = sizeof(*extent_item);
> > +	if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE)
> > +		size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
> > +	size += btrfs_extent_inline_ref_size(type);
> >   	path = btrfs_alloc_path();
> >   	if (!path)
> > @@ -4604,8 +4624,16 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
> >   	btrfs_set_extent_flags(leaf, extent_item,
> >   			       flags | BTRFS_EXTENT_FLAG_DATA);
> > +
> >   	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
> > +	if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) {
> > +		btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_EXTENT_OWNER_REF_KEY);
> > +		oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
> > +		btrfs_set_extent_owner_ref_root_id(leaf, oref, root_objectid);
> > +		iref = (struct btrfs_extent_inline_ref *)(oref + 1);
> > +	}
> >   	btrfs_set_extent_inline_ref_type(leaf, iref, type);
> > +
> >   	if (parent > 0) {
> >   		struct btrfs_shared_data_ref *ref;
> >   		ref = (struct btrfs_shared_data_ref *)(iref + 1);
> > diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
> > index b93c96213304..1114cd915bd8 100644
> > --- a/fs/btrfs/print-tree.c
> > +++ b/fs/btrfs/print-tree.c
> > @@ -80,12 +80,20 @@ static void print_extent_data_ref(struct extent_buffer *eb,
> >   	       btrfs_extent_data_ref_count(eb, ref));
> >   }
> > +static void print_extent_owner_ref(struct extent_buffer *eb,
> > +				   struct btrfs_extent_owner_ref *ref)
> > +{
> > +	WARN_ON(!btrfs_fs_incompat(eb->fs_info, SIMPLE_QUOTA));
> > +	pr_cont("extent data owner root %llu\n", btrfs_extent_owner_ref_root_id(eb, ref));
> > +}
> > +
> >   static void print_extent_item(struct extent_buffer *eb, int slot, int type)
> >   {
> >   	struct btrfs_extent_item *ei;
> >   	struct btrfs_extent_inline_ref *iref;
> >   	struct btrfs_extent_data_ref *dref;
> >   	struct btrfs_shared_data_ref *sref;
> > +	struct btrfs_extent_owner_ref *oref;
> >   	struct btrfs_disk_key key;
> >   	unsigned long end;
> >   	unsigned long ptr;
> > @@ -159,6 +167,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
> >   			"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
> >   				     offset, eb->fs_info->sectorsize);
> >   			break;
> > +		case BTRFS_EXTENT_OWNER_REF_KEY:
> > +			oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
> > +			print_extent_owner_ref(eb, oref);
> > +			break;
> >   		default:
> >   			pr_cont("(extent %llu has INVALID ref type %d)\n",
> >   				  eb->start, type);
> > diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
> > index 95d28497de7c..9edc87eaff1f 100644
> > --- a/fs/btrfs/ref-verify.c
> > +++ b/fs/btrfs/ref-verify.c
> > @@ -485,6 +485,9 @@ static int process_extent_item(struct btrfs_fs_info *fs_info,
> >   			ret = add_shared_data_ref(fs_info, offset, count,
> >   						  key->objectid, key->offset);
> >   			break;
> > +		case BTRFS_EXTENT_OWNER_REF_KEY:
> > +			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
> > +			break;
> >   		default:
> >   			btrfs_err(fs_info, "invalid key type in iref");
> >   			ret = -EINVAL;
> > diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
> > index e2b54793bf0c..27d4230a38a8 100644
> > --- a/fs/btrfs/tree-checker.c
> > +++ b/fs/btrfs/tree-checker.c
> > @@ -1451,6 +1451,9 @@ static int check_extent_item(struct extent_buffer *leaf,
> >   			}
> >   			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
> >   			break;
> > +		case BTRFS_EXTENT_OWNER_REF_KEY:
> > +			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
> > +			break;
> >   		default:
> >   			extent_err(leaf, slot, "unknown inline ref type: %u",
> >   				   inline_type);
> > diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
> > index ab38d0f411fa..424c7f342712 100644
> > --- a/include/uapi/linux/btrfs_tree.h
> > +++ b/include/uapi/linux/btrfs_tree.h
> > @@ -226,6 +226,8 @@
> >   #define BTRFS_SHARED_DATA_REF_KEY	184
> > +#define BTRFS_EXTENT_OWNER_REF_KEY	190
> > +
> >   /*
> >    * block groups give us hints into the extent allocation trees.  Which
> >    * blocks are free etc etc
> > @@ -783,6 +785,10 @@ struct btrfs_shared_data_ref {
> >   	__le32 count;
> >   } __attribute__ ((__packed__));
> > +struct btrfs_extent_owner_ref {
> > +	u64 root_id;
> > +} __attribute__ ((__packed__));
> > +
> >   struct btrfs_extent_inline_ref {
> >   	__u8 type;
> >   	__le64 offset;
>
Qu Wenruo May 4, 2023, 9:49 p.m. UTC | #3
On 2023/5/5 00:17, Boris Burkov wrote:
> On Wed, May 03, 2023 at 11:17:12AM +0800, Qu Wenruo wrote:
>>
>>
>> On 2023/5/3 08:59, Boris Burkov wrote:
>>> In order to implement simple quota groups, we need to be able to
>>> associate a data extent with the subvolume that created it. Once you
>>> account for reflink, this information cannot be recovered without
>>> explicitly storing it. Options for storing it are:
>>> - a new key/item
>>> - a new extent inline ref item
>>>
>>> The former is backwards compatible, but wastes space, the latter is
>>> incompat, but is efficient in space and reuses the existing inline ref
>>> machinery, while only abusing it a tiny amount -- specifically, the new
>>> item is not a ref, per-se.
>>
>> Even we introduce new extent tree items, we can still mark the fs compat_ro.
>>
>> As long as we don't do any writes, we can still read the fs without any
>> compatibility problem, and the enable/disable should be addressed by
>> btrfstune/mkfs anyway.
> 
> Unfortunately, I don't believe compat_ro is possible with this design.
> Because of how inline ref items are implemented, there is a lot of code
> that makes assumptions about the extent item size, and the inline ref
> item size based on their type. The best example that definitely breaks
> things rather than maybe just warning is check_extent in tree-checker.c

IIRC if it's compat_ro, older kernel would reject the block group items 
read.

If we expand that behavior to reject the whole extent tree, it can stay 
compat_ro.
Although you may need to do extra backports.

> 
> With a new unparseable ref item inserted in the sequence of refs, that
> code will either overflow or detect padding. The size calculation comes
> up 0, etc. Perhaps there is a clever way to trick it, but I have not
> seen it yet.
> 
> I was able to make it compat_ro by introducing an entirely new item for
> the owner ref, but that comes with a per extent disk usage tradeoff that
> is fairly steep for storing just a single u64.

If it's only to glue the original ref to an extent, I guess a new key 
without an item would be enough.
Although that's still quite expensive.

> 
>>
>> Thanks,
>> Qu
>>>
>>> Signed-off-by: Boris Burkov <boris@bur.io>
>>> ---
>>>    fs/btrfs/accessors.h            |  4 +++
>>>    fs/btrfs/backref.c              |  3 ++
>>>    fs/btrfs/extent-tree.c          | 50 +++++++++++++++++++++++++--------
>>>    fs/btrfs/print-tree.c           | 12 ++++++++
>>>    fs/btrfs/ref-verify.c           |  3 ++
>>>    fs/btrfs/tree-checker.c         |  3 ++
>>>    include/uapi/linux/btrfs_tree.h |  6 ++++
>>>    7 files changed, 70 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
>>> index ceadfc5d6c66..aab61312e4e8 100644
>>> --- a/fs/btrfs/accessors.h
>>> +++ b/fs/btrfs/accessors.h
>>> @@ -350,6 +350,8 @@ BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, count, 3
>>>    BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, count, 32);
>>> +BTRFS_SETGET_FUNCS(extent_owner_ref_root_id, struct btrfs_extent_owner_ref, root_id, 64);
>>> +
>>>    BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
>>>    		   type, 8);
>>>    BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
>>> @@ -366,6 +368,8 @@ static inline u32 btrfs_extent_inline_ref_size(int type)
>>>    	if (type == BTRFS_EXTENT_DATA_REF_KEY)
>>>    		return sizeof(struct btrfs_extent_data_ref) +
>>>    		       offsetof(struct btrfs_extent_inline_ref, offset);
>>> +	if (type == BTRFS_EXTENT_OWNER_REF_KEY)
>>> +		return sizeof(struct btrfs_extent_inline_ref);
>>>    	return 0;
>>>    }
>>> diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
>>> index e54f0884802a..8cd8ed6c572f 100644
>>> --- a/fs/btrfs/backref.c
>>> +++ b/fs/btrfs/backref.c
>>> @@ -1128,6 +1128,9 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
>>>    						       count, sc, GFP_NOFS);
>>>    			break;
>>>    		}
>>> +		case BTRFS_EXTENT_OWNER_REF_KEY:
>>> +			WARN_ON(!btrfs_fs_incompat(ctx->fs_info, SIMPLE_QUOTA));
>>> +			break;
>>>    		default:
>>>    			WARN_ON(1);
>>>    		}
>>> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
>>> index 5cd289de4e92..b9a2f1e355b7 100644
>>> --- a/fs/btrfs/extent-tree.c
>>> +++ b/fs/btrfs/extent-tree.c
>>> @@ -363,9 +363,13 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
>>>    				     struct btrfs_extent_inline_ref *iref,
>>>    				     enum btrfs_inline_ref_type is_data)
>>>    {
>>> +	struct btrfs_fs_info *fs_info = eb->fs_info;
>>>    	int type = btrfs_extent_inline_ref_type(eb, iref);
>>>    	u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
>>> +	if (type == BTRFS_EXTENT_OWNER_REF_KEY && btrfs_fs_incompat(fs_info, SIMPLE_QUOTA))
>>> +		return type;
>>> +
>>>    	if (type == BTRFS_TREE_BLOCK_REF_KEY ||
>>>    	    type == BTRFS_SHARED_BLOCK_REF_KEY ||
>>>    	    type == BTRFS_SHARED_DATA_REF_KEY ||
>>> @@ -374,26 +378,25 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
>>>    			if (type == BTRFS_TREE_BLOCK_REF_KEY)
>>>    				return type;
>>>    			if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
>>> -				ASSERT(eb->fs_info);
>>> +				ASSERT(fs_info);
>>>    				/*
>>>    				 * Every shared one has parent tree block,
>>>    				 * which must be aligned to sector size.
>>>    				 */
>>> -				if (offset &&
>>> -				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
>>> +				if (offset && IS_ALIGNED(offset, fs_info->sectorsize))
>>>    					return type;
>>>    			}
>>>    		} else if (is_data == BTRFS_REF_TYPE_DATA) {
>>>    			if (type == BTRFS_EXTENT_DATA_REF_KEY)
>>>    				return type;
>>>    			if (type == BTRFS_SHARED_DATA_REF_KEY) {
>>> -				ASSERT(eb->fs_info);
>>> +				ASSERT(fs_info);
>>>    				/*
>>>    				 * Every shared one has parent tree block,
>>>    				 * which must be aligned to sector size.
>>>    				 */
>>>    				if (offset &&
>>> -				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
>>> +				    IS_ALIGNED(offset, fs_info->sectorsize))
>>>    					return type;
>>>    			}
>>>    		} else {
>>> @@ -403,7 +406,7 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
>>>    	}
>>>    	btrfs_print_leaf((struct extent_buffer *)eb);
>>> -	btrfs_err(eb->fs_info,
>>> +	btrfs_err(fs_info,
>>>    		  "eb %llu iref 0x%lx invalid extent inline ref type %d",
>>>    		  eb->start, (unsigned long)iref, type);
>>>    	WARN_ON(1);
>>> @@ -912,6 +915,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
>>>    		}
>>>    		iref = (struct btrfs_extent_inline_ref *)ptr;
>>>    		type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
>>> +		if (type == BTRFS_EXTENT_OWNER_REF_KEY) {
>>> +			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
>>> +			ptr += btrfs_extent_inline_ref_size(type);
>>> +			continue;
>>> +		}
>>>    		if (type == BTRFS_REF_TYPE_INVALID) {
>>>    			err = -EUCLEAN;
>>>    			goto out;
>>> @@ -1708,6 +1716,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
>>>    		 node->type == BTRFS_SHARED_DATA_REF_KEY)
>>>    		ret = run_delayed_data_ref(trans, node, extent_op,
>>>    					   insert_reserved);
>>> +	else if (node->type == BTRFS_EXTENT_OWNER_REF_KEY)
>>> +		ret = 0;
>>>    	else
>>>    		BUG();
>>>    	if (ret && insert_reserved)
>>> @@ -2275,6 +2285,7 @@ static noinline int check_committed_ref(struct btrfs_root *root,
>>>    	struct btrfs_extent_item *ei;
>>>    	struct btrfs_key key;
>>>    	u32 item_size;
>>> +	u32 expected_size;
>>>    	int type;
>>>    	int ret;
>>> @@ -2301,10 +2312,17 @@ static noinline int check_committed_ref(struct btrfs_root *root,
>>>    	ret = 1;
>>>    	item_size = btrfs_item_size(leaf, path->slots[0]);
>>>    	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
>>> +	expected_size = sizeof(*ei) + btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY);
>>> +
>>> +	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
>>> +	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
>>> +	if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA) && type == BTRFS_EXTENT_OWNER_REF_KEY) {
>>> +		expected_size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
>>> +		iref = (struct btrfs_extent_inline_ref *)(iref + 1);
>>> +	}
>>>    	/* If extent item has more than 1 inline ref then it's shared */
>>> -	if (item_size != sizeof(*ei) +
>>> -	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
>>> +	if (item_size != expected_size)
>>>    		goto out;
>>>    	/*
>>> @@ -2316,8 +2334,6 @@ static noinline int check_committed_ref(struct btrfs_root *root,
>>>    	     btrfs_root_last_snapshot(&root->root_item)))
>>>    		goto out;
>>> -	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
>>> -
>>>    	/* If this extent has SHARED_DATA_REF then it's shared */
>>>    	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
>>>    	if (type != BTRFS_EXTENT_DATA_REF_KEY)
>>> @@ -4572,6 +4588,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
>>>    	struct btrfs_root *extent_root;
>>>    	int ret;
>>>    	struct btrfs_extent_item *extent_item;
>>> +	struct btrfs_extent_owner_ref *oref;
>>>    	struct btrfs_extent_inline_ref *iref;
>>>    	struct btrfs_path *path;
>>>    	struct extent_buffer *leaf;
>>> @@ -4583,7 +4600,10 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
>>>    	else
>>>    		type = BTRFS_EXTENT_DATA_REF_KEY;
>>> -	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
>>> +	size = sizeof(*extent_item);
>>> +	if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE)
>>> +		size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
>>> +	size += btrfs_extent_inline_ref_size(type);
>>>    	path = btrfs_alloc_path();
>>>    	if (!path)
>>> @@ -4604,8 +4624,16 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
>>>    	btrfs_set_extent_flags(leaf, extent_item,
>>>    			       flags | BTRFS_EXTENT_FLAG_DATA);
>>> +
>>>    	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
>>> +	if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) {
>>> +		btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_EXTENT_OWNER_REF_KEY);
>>> +		oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
>>> +		btrfs_set_extent_owner_ref_root_id(leaf, oref, root_objectid);
>>> +		iref = (struct btrfs_extent_inline_ref *)(oref + 1);
>>> +	}
>>>    	btrfs_set_extent_inline_ref_type(leaf, iref, type);
>>> +
>>>    	if (parent > 0) {
>>>    		struct btrfs_shared_data_ref *ref;
>>>    		ref = (struct btrfs_shared_data_ref *)(iref + 1);
>>> diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
>>> index b93c96213304..1114cd915bd8 100644
>>> --- a/fs/btrfs/print-tree.c
>>> +++ b/fs/btrfs/print-tree.c
>>> @@ -80,12 +80,20 @@ static void print_extent_data_ref(struct extent_buffer *eb,
>>>    	       btrfs_extent_data_ref_count(eb, ref));
>>>    }
>>> +static void print_extent_owner_ref(struct extent_buffer *eb,
>>> +				   struct btrfs_extent_owner_ref *ref)
>>> +{
>>> +	WARN_ON(!btrfs_fs_incompat(eb->fs_info, SIMPLE_QUOTA));
>>> +	pr_cont("extent data owner root %llu\n", btrfs_extent_owner_ref_root_id(eb, ref));
>>> +}
>>> +
>>>    static void print_extent_item(struct extent_buffer *eb, int slot, int type)
>>>    {
>>>    	struct btrfs_extent_item *ei;
>>>    	struct btrfs_extent_inline_ref *iref;
>>>    	struct btrfs_extent_data_ref *dref;
>>>    	struct btrfs_shared_data_ref *sref;
>>> +	struct btrfs_extent_owner_ref *oref;
>>>    	struct btrfs_disk_key key;
>>>    	unsigned long end;
>>>    	unsigned long ptr;
>>> @@ -159,6 +167,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
>>>    			"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
>>>    				     offset, eb->fs_info->sectorsize);
>>>    			break;
>>> +		case BTRFS_EXTENT_OWNER_REF_KEY:
>>> +			oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
>>> +			print_extent_owner_ref(eb, oref);
>>> +			break;
>>>    		default:
>>>    			pr_cont("(extent %llu has INVALID ref type %d)\n",
>>>    				  eb->start, type);
>>> diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
>>> index 95d28497de7c..9edc87eaff1f 100644
>>> --- a/fs/btrfs/ref-verify.c
>>> +++ b/fs/btrfs/ref-verify.c
>>> @@ -485,6 +485,9 @@ static int process_extent_item(struct btrfs_fs_info *fs_info,
>>>    			ret = add_shared_data_ref(fs_info, offset, count,
>>>    						  key->objectid, key->offset);
>>>    			break;
>>> +		case BTRFS_EXTENT_OWNER_REF_KEY:
>>> +			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
>>> +			break;
>>>    		default:
>>>    			btrfs_err(fs_info, "invalid key type in iref");
>>>    			ret = -EINVAL;
>>> diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
>>> index e2b54793bf0c..27d4230a38a8 100644
>>> --- a/fs/btrfs/tree-checker.c
>>> +++ b/fs/btrfs/tree-checker.c
>>> @@ -1451,6 +1451,9 @@ static int check_extent_item(struct extent_buffer *leaf,
>>>    			}
>>>    			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
>>>    			break;
>>> +		case BTRFS_EXTENT_OWNER_REF_KEY:
>>> +			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
>>> +			break;
>>>    		default:
>>>    			extent_err(leaf, slot, "unknown inline ref type: %u",
>>>    				   inline_type);
>>> diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
>>> index ab38d0f411fa..424c7f342712 100644
>>> --- a/include/uapi/linux/btrfs_tree.h
>>> +++ b/include/uapi/linux/btrfs_tree.h
>>> @@ -226,6 +226,8 @@
>>>    #define BTRFS_SHARED_DATA_REF_KEY	184
>>> +#define BTRFS_EXTENT_OWNER_REF_KEY	190
>>> +
>>>    /*
>>>     * block groups give us hints into the extent allocation trees.  Which
>>>     * blocks are free etc etc
>>> @@ -783,6 +785,10 @@ struct btrfs_shared_data_ref {
>>>    	__le32 count;
>>>    } __attribute__ ((__packed__));
>>> +struct btrfs_extent_owner_ref {
>>> +	u64 root_id;
>>> +} __attribute__ ((__packed__));
>>> +
>>>    struct btrfs_extent_inline_ref {
>>>    	__u8 type;
>>>    	__le64 offset;
>>
David Sterba May 9, 2023, 11:58 p.m. UTC | #4
On Fri, May 05, 2023 at 05:49:00AM +0800, Qu Wenruo wrote:
> On 2023/5/5 00:17, Boris Burkov wrote:
> > On Wed, May 03, 2023 at 11:17:12AM +0800, Qu Wenruo wrote:
> >> On 2023/5/3 08:59, Boris Burkov wrote:
> >>> In order to implement simple quota groups, we need to be able to
> >>> associate a data extent with the subvolume that created it. Once you
> >>> account for reflink, this information cannot be recovered without
> >>> explicitly storing it. Options for storing it are:
> >>> - a new key/item
> >>> - a new extent inline ref item
> >>>
> >>> The former is backwards compatible, but wastes space, the latter is
> >>> incompat, but is efficient in space and reuses the existing inline ref
> >>> machinery, while only abusing it a tiny amount -- specifically, the new
> >>> item is not a ref, per-se.
> >>
> >> Even we introduce new extent tree items, we can still mark the fs compat_ro.
> >>
> >> As long as we don't do any writes, we can still read the fs without any
> >> compatibility problem, and the enable/disable should be addressed by
> >> btrfstune/mkfs anyway.
> > 
> > Unfortunately, I don't believe compat_ro is possible with this design.
> > Because of how inline ref items are implemented, there is a lot of code
> > that makes assumptions about the extent item size, and the inline ref
> > item size based on their type. The best example that definitely breaks
> > things rather than maybe just warning is check_extent in tree-checker.c
> 
> IIRC if it's compat_ro, older kernel would reject the block group items 
> read.
> 
> If we expand that behavior to reject the whole extent tree, it can stay 
> compat_ro.
> Although you may need to do extra backports.
> 
> > 
> > With a new unparseable ref item inserted in the sequence of refs, that
> > code will either overflow or detect padding. The size calculation comes
> > up 0, etc. Perhaps there is a clever way to trick it, but I have not
> > seen it yet.
> > 
> > I was able to make it compat_ro by introducing an entirely new item for
> > the owner ref, but that comes with a per extent disk usage tradeoff that
> > is fairly steep for storing just a single u64.
> 
> If it's only to glue the original ref to an extent, I guess a new key 
> without an item would be enough.
> Although that's still quite expensive.

I consider allocating a new key as a high cost, it's worth for new
feature like verity or encryption where we require a fine grained
tracking of some new information. The number space is 255 values wide
and there are some ranges that are relatively ordered so the placement
in the logical b-tree space is good. We still have enough free values
but the gaps get smaller each time so I'd rather consider other options
first.

One drawback with features defined by keys is that it can't be easily
seen from superblock if the feature is present or not. Like extended
refs, no holes, lzo/zstd compressed extents. We always need to add the
compat bit. In case we would add a new key just to store little data and
still need to add the incompat bit it's time to think again if we could
get away with just the incompat bit. With some loss of backward
compatibility.

Right now I don't know what would be the best way forward but I'm
leaning more towards less backward compatibility and saving space in
structures. We get new incompat features "regularly" and people move to
newer kernels eventually after some period where we have time to iron
out bugs and explore the use case.

The simple quotas should fill the gap that qgroups can't so it makes
sense and people have been asking for something like that.
David Sterba May 10, 2023, 4:57 p.m. UTC | #5
On Wed, May 03, 2023 at 11:17:12AM +0800, Qu Wenruo wrote:
> On 2023/5/3 08:59, Boris Burkov wrote:
> > In order to implement simple quota groups, we need to be able to
> > associate a data extent with the subvolume that created it. Once you
> > account for reflink, this information cannot be recovered without
> > explicitly storing it. Options for storing it are:
> > - a new key/item
> > - a new extent inline ref item
> > 
> > The former is backwards compatible, but wastes space, the latter is
> > incompat, but is efficient in space and reuses the existing inline ref
> > machinery, while only abusing it a tiny amount -- specifically, the new
> > item is not a ref, per-se.
> 
> Even we introduce new extent tree items, we can still mark the fs compat_ro.
> 
> As long as we don't do any writes, we can still read the fs without any 
> compatibility problem, and the enable/disable should be addressed by 
> btrfstune/mkfs anyway.

There a was a discussion today how the simple quotas should be enabled.
We have 3 ways, ioctl, mkfs and btrfstune. Currently the qgroups can be
enabled by an ioctl and newly at mkfs time.

For squotas I'd do the same, for interface parity and because the quotas
are a feature that allows that, it's an accounting layer on top of the
extent structures. Other mkfs features are once and for the whole
filesystem lifetime.

You suggest to avoid doing ioctl, which I'd understand to avoid all the
problems with races and deadlocks that we have been fixing. Fortunatelly
the quota enable ioctl is extensible so we can add the squota
enable/disable commands and built on top of the whole quota
infrastructure we already have.

In addition the mkfs enabling should work too, like for qgroups. I think
we should support the use case when the need to start accounting data
comes later than mkfs and unmounting the filesystem is not feasible.

This also follows the existing usage of the generic quotas that can be
enabled or disabled as needed.
Qu Wenruo May 11, 2023, 12:07 a.m. UTC | #6
On 2023/5/11 00:57, David Sterba wrote:
> On Wed, May 03, 2023 at 11:17:12AM +0800, Qu Wenruo wrote:
>> On 2023/5/3 08:59, Boris Burkov wrote:
>>> In order to implement simple quota groups, we need to be able to
>>> associate a data extent with the subvolume that created it. Once you
>>> account for reflink, this information cannot be recovered without
>>> explicitly storing it. Options for storing it are:
>>> - a new key/item
>>> - a new extent inline ref item
>>>
>>> The former is backwards compatible, but wastes space, the latter is
>>> incompat, but is efficient in space and reuses the existing inline ref
>>> machinery, while only abusing it a tiny amount -- specifically, the new
>>> item is not a ref, per-se.
>>
>> Even we introduce new extent tree items, we can still mark the fs compat_ro.
>>
>> As long as we don't do any writes, we can still read the fs without any
>> compatibility problem, and the enable/disable should be addressed by
>> btrfstune/mkfs anyway.
>
> There a was a discussion today how the simple quotas should be enabled.
> We have 3 ways, ioctl, mkfs and btrfstune. Currently the qgroups can be
> enabled by an ioctl and newly at mkfs time.
>
> For squotas I'd do the same, for interface parity and because the quotas
> are a feature that allows that, it's an accounting layer on top of the
> extent structures. Other mkfs features are once and for the whole
> filesystem lifetime.

OK, ioctl is still better than mount option, so it's acceptable to me.

The other concern is, would this be compat_ro or incompat?

I want to ensure extent tree change still to be compat_ro, which may
requires us to make sure unsupported compat_ro flags would not cause any
extent tree read.

We have already skipped bg items search, but we still read the extent
tree root.

Thanks,
Qu
>
> You suggest to avoid doing ioctl, which I'd understand to avoid all the
> problems with races and deadlocks that we have been fixing. Fortunatelly
> the quota enable ioctl is extensible so we can add the squota
> enable/disable commands and built on top of the whole quota
> infrastructure we already have.
>
> In addition the mkfs enabling should work too, like for qgroups. I think
> we should support the use case when the need to start accounting data
> comes later than mkfs and unmounting the filesystem is not feasible.
>
> This also follows the existing usage of the generic quotas that can be
> enabled or disabled as needed.
Qu Wenruo May 13, 2023, 6:31 a.m. UTC | #7
On 2023/5/11 00:57, David Sterba wrote:
> On Wed, May 03, 2023 at 11:17:12AM +0800, Qu Wenruo wrote:
>> On 2023/5/3 08:59, Boris Burkov wrote:
>>> In order to implement simple quota groups, we need to be able to
>>> associate a data extent with the subvolume that created it. Once you
>>> account for reflink, this information cannot be recovered without
>>> explicitly storing it. Options for storing it are:
>>> - a new key/item
>>> - a new extent inline ref item
>>>
>>> The former is backwards compatible, but wastes space, the latter is
>>> incompat, but is efficient in space and reuses the existing inline ref
>>> machinery, while only abusing it a tiny amount -- specifically, the new
>>> item is not a ref, per-se.
>>
>> Even we introduce new extent tree items, we can still mark the fs compat_ro.
>>
>> As long as we don't do any writes, we can still read the fs without any
>> compatibility problem, and the enable/disable should be addressed by
>> btrfstune/mkfs anyway.
>
> There a was a discussion today how the simple quotas should be enabled.
> We have 3 ways, ioctl, mkfs and btrfstune. Currently the qgroups can be
> enabled by an ioctl and newly at mkfs time.
>
> For squotas I'd do the same, for interface parity and because the quotas
> are a feature that allows that, it's an accounting layer on top of the
> extent structures. Other mkfs features are once and for the whole
> filesystem lifetime.
>
> You suggest to avoid doing ioctl, which I'd understand to avoid all the
> problems with races and deadlocks that we have been fixing. Fortunatelly
> the quota enable ioctl is extensible so we can add the squota
> enable/disable commands and built on top of the whole quota
> infrastructure we already have.
>
> In addition the mkfs enabling should work too, like for qgroups. I think
> we should support the use case when the need to start accounting data
> comes later than mkfs and unmounting the filesystem is not feasible.
>
> This also follows the existing usage of the generic quotas that can be
> enabled or disabled as needed.

BTW, if we go ioctl method, there may be more trade-off to do between
dedicated tree and inside extent tree:

- Scan progress
   If go regular extent tree, we need to update quite a large part (if
   not the whole) of the extent tree, for both enable and disable.

   If go dedicate tree, it's at least less large as the extent tree.
   For the item space inefficiency, we can pack several <bytenr, owner>
   pair into the leaf items, a little like how we handle csum items.

- Subvolume deletion (without any snapshot)
   For regular extent tree, it's less a concern, as we need to delete
   related extents anyway.

   But for dedicated tree if the subvolume is large enough, we may update
   the whole dedicate tree in just one transaction.
   (although it should still be smaller than the extent tree).

- Compatibility with extent tree v2
   For regular extent tree, it's pretty straight forward.
   But for dedicated tree, should we split the tree?

Thanks,
Qu
diff mbox series

Patch

diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
index ceadfc5d6c66..aab61312e4e8 100644
--- a/fs/btrfs/accessors.h
+++ b/fs/btrfs/accessors.h
@@ -350,6 +350,8 @@  BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, count, 3
 
 BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, count, 32);
 
+BTRFS_SETGET_FUNCS(extent_owner_ref_root_id, struct btrfs_extent_owner_ref, root_id, 64);
+
 BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
 		   type, 8);
 BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
@@ -366,6 +368,8 @@  static inline u32 btrfs_extent_inline_ref_size(int type)
 	if (type == BTRFS_EXTENT_DATA_REF_KEY)
 		return sizeof(struct btrfs_extent_data_ref) +
 		       offsetof(struct btrfs_extent_inline_ref, offset);
+	if (type == BTRFS_EXTENT_OWNER_REF_KEY)
+		return sizeof(struct btrfs_extent_inline_ref);
 	return 0;
 }
 
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e54f0884802a..8cd8ed6c572f 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1128,6 +1128,9 @@  static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
 						       count, sc, GFP_NOFS);
 			break;
 		}
+		case BTRFS_EXTENT_OWNER_REF_KEY:
+			WARN_ON(!btrfs_fs_incompat(ctx->fs_info, SIMPLE_QUOTA));
+			break;
 		default:
 			WARN_ON(1);
 		}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5cd289de4e92..b9a2f1e355b7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -363,9 +363,13 @@  int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
 				     struct btrfs_extent_inline_ref *iref,
 				     enum btrfs_inline_ref_type is_data)
 {
+	struct btrfs_fs_info *fs_info = eb->fs_info;
 	int type = btrfs_extent_inline_ref_type(eb, iref);
 	u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
 
+	if (type == BTRFS_EXTENT_OWNER_REF_KEY && btrfs_fs_incompat(fs_info, SIMPLE_QUOTA))
+		return type;
+
 	if (type == BTRFS_TREE_BLOCK_REF_KEY ||
 	    type == BTRFS_SHARED_BLOCK_REF_KEY ||
 	    type == BTRFS_SHARED_DATA_REF_KEY ||
@@ -374,26 +378,25 @@  int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
 			if (type == BTRFS_TREE_BLOCK_REF_KEY)
 				return type;
 			if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
-				ASSERT(eb->fs_info);
+				ASSERT(fs_info);
 				/*
 				 * Every shared one has parent tree block,
 				 * which must be aligned to sector size.
 				 */
-				if (offset &&
-				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
+				if (offset && IS_ALIGNED(offset, fs_info->sectorsize))
 					return type;
 			}
 		} else if (is_data == BTRFS_REF_TYPE_DATA) {
 			if (type == BTRFS_EXTENT_DATA_REF_KEY)
 				return type;
 			if (type == BTRFS_SHARED_DATA_REF_KEY) {
-				ASSERT(eb->fs_info);
+				ASSERT(fs_info);
 				/*
 				 * Every shared one has parent tree block,
 				 * which must be aligned to sector size.
 				 */
 				if (offset &&
-				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
+				    IS_ALIGNED(offset, fs_info->sectorsize))
 					return type;
 			}
 		} else {
@@ -403,7 +406,7 @@  int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
 	}
 
 	btrfs_print_leaf((struct extent_buffer *)eb);
-	btrfs_err(eb->fs_info,
+	btrfs_err(fs_info,
 		  "eb %llu iref 0x%lx invalid extent inline ref type %d",
 		  eb->start, (unsigned long)iref, type);
 	WARN_ON(1);
@@ -912,6 +915,11 @@  int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 		}
 		iref = (struct btrfs_extent_inline_ref *)ptr;
 		type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
+		if (type == BTRFS_EXTENT_OWNER_REF_KEY) {
+			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
+			ptr += btrfs_extent_inline_ref_size(type);
+			continue;
+		}
 		if (type == BTRFS_REF_TYPE_INVALID) {
 			err = -EUCLEAN;
 			goto out;
@@ -1708,6 +1716,8 @@  static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
 		 node->type == BTRFS_SHARED_DATA_REF_KEY)
 		ret = run_delayed_data_ref(trans, node, extent_op,
 					   insert_reserved);
+	else if (node->type == BTRFS_EXTENT_OWNER_REF_KEY)
+		ret = 0;
 	else
 		BUG();
 	if (ret && insert_reserved)
@@ -2275,6 +2285,7 @@  static noinline int check_committed_ref(struct btrfs_root *root,
 	struct btrfs_extent_item *ei;
 	struct btrfs_key key;
 	u32 item_size;
+	u32 expected_size;
 	int type;
 	int ret;
 
@@ -2301,10 +2312,17 @@  static noinline int check_committed_ref(struct btrfs_root *root,
 	ret = 1;
 	item_size = btrfs_item_size(leaf, path->slots[0]);
 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
+	expected_size = sizeof(*ei) + btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY);
+
+	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
+	if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA) && type == BTRFS_EXTENT_OWNER_REF_KEY) {
+		expected_size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
+		iref = (struct btrfs_extent_inline_ref *)(iref + 1);
+	}
 
 	/* If extent item has more than 1 inline ref then it's shared */
-	if (item_size != sizeof(*ei) +
-	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
+	if (item_size != expected_size)
 		goto out;
 
 	/*
@@ -2316,8 +2334,6 @@  static noinline int check_committed_ref(struct btrfs_root *root,
 	     btrfs_root_last_snapshot(&root->root_item)))
 		goto out;
 
-	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
-
 	/* If this extent has SHARED_DATA_REF then it's shared */
 	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
 	if (type != BTRFS_EXTENT_DATA_REF_KEY)
@@ -4572,6 +4588,7 @@  static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 	struct btrfs_root *extent_root;
 	int ret;
 	struct btrfs_extent_item *extent_item;
+	struct btrfs_extent_owner_ref *oref;
 	struct btrfs_extent_inline_ref *iref;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
@@ -4583,7 +4600,10 @@  static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 	else
 		type = BTRFS_EXTENT_DATA_REF_KEY;
 
-	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
+	size = sizeof(*extent_item);
+	if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE)
+		size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
+	size += btrfs_extent_inline_ref_size(type);
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -4604,8 +4624,16 @@  static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 	btrfs_set_extent_flags(leaf, extent_item,
 			       flags | BTRFS_EXTENT_FLAG_DATA);
 
+
 	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+	if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA)) {
+		btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_EXTENT_OWNER_REF_KEY);
+		oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
+		btrfs_set_extent_owner_ref_root_id(leaf, oref, root_objectid);
+		iref = (struct btrfs_extent_inline_ref *)(oref + 1);
+	}
 	btrfs_set_extent_inline_ref_type(leaf, iref, type);
+
 	if (parent > 0) {
 		struct btrfs_shared_data_ref *ref;
 		ref = (struct btrfs_shared_data_ref *)(iref + 1);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index b93c96213304..1114cd915bd8 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -80,12 +80,20 @@  static void print_extent_data_ref(struct extent_buffer *eb,
 	       btrfs_extent_data_ref_count(eb, ref));
 }
 
+static void print_extent_owner_ref(struct extent_buffer *eb,
+				   struct btrfs_extent_owner_ref *ref)
+{
+	WARN_ON(!btrfs_fs_incompat(eb->fs_info, SIMPLE_QUOTA));
+	pr_cont("extent data owner root %llu\n", btrfs_extent_owner_ref_root_id(eb, ref));
+}
+
 static void print_extent_item(struct extent_buffer *eb, int slot, int type)
 {
 	struct btrfs_extent_item *ei;
 	struct btrfs_extent_inline_ref *iref;
 	struct btrfs_extent_data_ref *dref;
 	struct btrfs_shared_data_ref *sref;
+	struct btrfs_extent_owner_ref *oref;
 	struct btrfs_disk_key key;
 	unsigned long end;
 	unsigned long ptr;
@@ -159,6 +167,10 @@  static void print_extent_item(struct extent_buffer *eb, int slot, int type)
 			"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
 				     offset, eb->fs_info->sectorsize);
 			break;
+		case BTRFS_EXTENT_OWNER_REF_KEY:
+			oref = (struct btrfs_extent_owner_ref *)(&iref->offset);
+			print_extent_owner_ref(eb, oref);
+			break;
 		default:
 			pr_cont("(extent %llu has INVALID ref type %d)\n",
 				  eb->start, type);
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 95d28497de7c..9edc87eaff1f 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -485,6 +485,9 @@  static int process_extent_item(struct btrfs_fs_info *fs_info,
 			ret = add_shared_data_ref(fs_info, offset, count,
 						  key->objectid, key->offset);
 			break;
+		case BTRFS_EXTENT_OWNER_REF_KEY:
+			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
+			break;
 		default:
 			btrfs_err(fs_info, "invalid key type in iref");
 			ret = -EINVAL;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index e2b54793bf0c..27d4230a38a8 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1451,6 +1451,9 @@  static int check_extent_item(struct extent_buffer *leaf,
 			}
 			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
 			break;
+		case BTRFS_EXTENT_OWNER_REF_KEY:
+			WARN_ON(!btrfs_fs_incompat(fs_info, SIMPLE_QUOTA));
+			break;
 		default:
 			extent_err(leaf, slot, "unknown inline ref type: %u",
 				   inline_type);
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index ab38d0f411fa..424c7f342712 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -226,6 +226,8 @@ 
 
 #define BTRFS_SHARED_DATA_REF_KEY	184
 
+#define BTRFS_EXTENT_OWNER_REF_KEY	190
+
 /*
  * block groups give us hints into the extent allocation trees.  Which
  * blocks are free etc etc
@@ -783,6 +785,10 @@  struct btrfs_shared_data_ref {
 	__le32 count;
 } __attribute__ ((__packed__));
 
+struct btrfs_extent_owner_ref {
+	u64 root_id;
+} __attribute__ ((__packed__));
+
 struct btrfs_extent_inline_ref {
 	__u8 type;
 	__le64 offset;