diff mbox series

[14/24] xfs: support caching rtgroup metadata inodes

Message ID 172437087487.59588.6672080001636292983.stgit@frogsfrogsfrogs (mailing list archive)
State New
Headers show
Series [01/24] xfs: clean up the ISVALID macro in xfs_bmap_adjacent | expand

Commit Message

Darrick J. Wong Aug. 23, 2024, 12:18 a.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Create the necessary per-rtgroup infrastructure that we need to load
metadata inodes into memory.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_rtgroup.c |  182 +++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_rtgroup.h |   28 +++++++
 fs/xfs/xfs_mount.h          |    1 
 fs/xfs/xfs_rtalloc.c        |   48 +++++++++++
 4 files changed, 258 insertions(+), 1 deletion(-)

Comments

Christoph Hellwig Aug. 23, 2024, 5:02 a.m. UTC | #1
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Dave Chinner Aug. 26, 2024, 1:41 a.m. UTC | #2
On Thu, Aug 22, 2024 at 05:18:18PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Create the necessary per-rtgroup infrastructure that we need to load
> metadata inodes into memory.
> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/libxfs/xfs_rtgroup.c |  182 +++++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/libxfs/xfs_rtgroup.h |   28 +++++++
>  fs/xfs/xfs_mount.h          |    1 
>  fs/xfs/xfs_rtalloc.c        |   48 +++++++++++
>  4 files changed, 258 insertions(+), 1 deletion(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
> index ae6d67c673b1a..50e4a56d749f0 100644
> --- a/fs/xfs/libxfs/xfs_rtgroup.c
> +++ b/fs/xfs/libxfs/xfs_rtgroup.c
> @@ -30,6 +30,8 @@
>  #include "xfs_icache.h"
>  #include "xfs_rtgroup.h"
>  #include "xfs_rtbitmap.h"
> +#include "xfs_metafile.h"
> +#include "xfs_metadir.h"
>  
>  /*
>   * Passive reference counting access wrappers to the rtgroup structures.  If
> @@ -295,3 +297,183 @@ xfs_rtginode_lockdep_setup(
>  #else
>  #define xfs_rtginode_lockdep_setup(ip, rgno, type)	do { } while (0)
>  #endif /* CONFIG_PROVE_LOCKING */
> +
> +struct xfs_rtginode_ops {
> +	const char		*name;	/* short name */
> +
> +	enum xfs_metafile_type	metafile_type;
> +
> +	/* Does the fs have this feature? */
> +	bool			(*enabled)(struct xfs_mount *mp);
> +
> +	/* Create this rtgroup metadata inode and initialize it. */
> +	int			(*create)(struct xfs_rtgroup *rtg,
> +					  struct xfs_inode *ip,
> +					  struct xfs_trans *tp,
> +					  bool init);
> +};

What's all this for?

AFAICT, loading the inodes into the rtgs requires a call to
xfs_metadir_load() when initialising the rtg (either at mount or
lazily on the first access to the rtg). Hence I'm not really sure
what this complexity is needed for, and the commit message is not
very informative....


> +static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
> +};
> +
> +/* Return the shortname of this rtgroup inode. */
> +const char *
> +xfs_rtginode_name(
> +	enum xfs_rtg_inodes	type)
> +{
> +	return xfs_rtginode_ops[type].name;
> +}
> +
> +/* Should this rtgroup inode be present? */
> +bool
> +xfs_rtginode_enabled(
> +	struct xfs_rtgroup	*rtg,
> +	enum xfs_rtg_inodes	type)
> +{
> +	const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
> +
> +	if (!ops->enabled)
> +		return true;
> +	return ops->enabled(rtg->rtg_mount);
> +}
> +
> +/* Load and existing rtgroup inode into the rtgroup structure. */
> +int
> +xfs_rtginode_load(
> +	struct xfs_rtgroup	*rtg,
> +	enum xfs_rtg_inodes	type,
> +	struct xfs_trans	*tp)
> +{
> +	struct xfs_mount	*mp = tp->t_mountp;
> +	const char		*path;
> +	struct xfs_inode	*ip;
> +	const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
> +	int			error;
> +
> +	if (!xfs_rtginode_enabled(rtg, type))
> +		return 0;
> +
> +	if (!mp->m_rtdirip)
> +		return -EFSCORRUPTED;
> +
> +	path = xfs_rtginode_path(rtg->rtg_rgno, type);
> +	if (!path)
> +		return -ENOMEM;
> +	error = xfs_metadir_load(tp, mp->m_rtdirip, path, ops->metafile_type,
> +			&ip);
> +	kfree(path);
> +
> +	if (error)
> +		return error;
> +
> +	if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
> +			       ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
> +		xfs_irele(ip);
> +		return -EFSCORRUPTED;
> +	}

We don't support LOCAL format for any type of regular file inodes,
so I'm a little confiused as to why this wouldn't be caught by the
verifier on inode read? i.e.  What problem is this trying to catch,
and why doesn't the inode verifier catch it for us?

> +	if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg->rtg_rgno)) {
> +		xfs_irele(ip);
> +		return -EFSCORRUPTED;
> +	}
> +
> +	xfs_rtginode_lockdep_setup(ip, rtg->rtg_rgno, type);
> +	rtg->rtg_inodes[type] = ip;
> +	return 0;
> +}
> +
> +/* Release an rtgroup metadata inode. */
> +void
> +xfs_rtginode_irele(
> +	struct xfs_inode	**ipp)
> +{
> +	if (*ipp)
> +		xfs_irele(*ipp);
> +	*ipp = NULL;
> +}
> +
> +/* Add a metadata inode for a realtime rmap btree. */
> +int
> +xfs_rtginode_create(
> +	struct xfs_rtgroup		*rtg,
> +	enum xfs_rtg_inodes		type,
> +	bool				init)

This doesn't seem to belong in this patchset...

....

> +/* Create the parent directory for all rtgroup inodes and load it. */
> +int
> +xfs_rtginode_mkdir_parent(
> +	struct xfs_mount	*mp)

Or this...

-Dave.
Darrick J. Wong Aug. 26, 2024, 6:37 p.m. UTC | #3
On Mon, Aug 26, 2024 at 11:41:19AM +1000, Dave Chinner wrote:
> On Thu, Aug 22, 2024 at 05:18:18PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Create the necessary per-rtgroup infrastructure that we need to load
> > metadata inodes into memory.
> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  fs/xfs/libxfs/xfs_rtgroup.c |  182 +++++++++++++++++++++++++++++++++++++++++++
> >  fs/xfs/libxfs/xfs_rtgroup.h |   28 +++++++
> >  fs/xfs/xfs_mount.h          |    1 
> >  fs/xfs/xfs_rtalloc.c        |   48 +++++++++++
> >  4 files changed, 258 insertions(+), 1 deletion(-)
> > 
> > 
> > diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
> > index ae6d67c673b1a..50e4a56d749f0 100644
> > --- a/fs/xfs/libxfs/xfs_rtgroup.c
> > +++ b/fs/xfs/libxfs/xfs_rtgroup.c
> > @@ -30,6 +30,8 @@
> >  #include "xfs_icache.h"
> >  #include "xfs_rtgroup.h"
> >  #include "xfs_rtbitmap.h"
> > +#include "xfs_metafile.h"
> > +#include "xfs_metadir.h"
> >  
> >  /*
> >   * Passive reference counting access wrappers to the rtgroup structures.  If
> > @@ -295,3 +297,183 @@ xfs_rtginode_lockdep_setup(
> >  #else
> >  #define xfs_rtginode_lockdep_setup(ip, rgno, type)	do { } while (0)
> >  #endif /* CONFIG_PROVE_LOCKING */
> > +
> > +struct xfs_rtginode_ops {
> > +	const char		*name;	/* short name */
> > +
> > +	enum xfs_metafile_type	metafile_type;
> > +
> > +	/* Does the fs have this feature? */
> > +	bool			(*enabled)(struct xfs_mount *mp);
> > +
> > +	/* Create this rtgroup metadata inode and initialize it. */
> > +	int			(*create)(struct xfs_rtgroup *rtg,
> > +					  struct xfs_inode *ip,
> > +					  struct xfs_trans *tp,
> > +					  bool init);
> > +};
> 
> What's all this for?
> 
> AFAICT, loading the inodes into the rtgs requires a call to
> xfs_metadir_load() when initialising the rtg (either at mount or
> lazily on the first access to the rtg). Hence I'm not really sure
> what this complexity is needed for, and the commit message is not
> very informative....

Yes, the creation and mkdir code in here is really to support growfs,
mkfs, and repair.  How about I change the commit message to:

"Create the necessary per-rtgroup infrastructure that we need to load
metadata inodes into memory and to create directory trees on the fly.
Loading is needed by the mounting process.  Creation is needed by
growfs, mkfs, and repair."

> > +static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
> > +};
> > +
> > +/* Return the shortname of this rtgroup inode. */
> > +const char *
> > +xfs_rtginode_name(
> > +	enum xfs_rtg_inodes	type)
> > +{
> > +	return xfs_rtginode_ops[type].name;
> > +}
> > +
> > +/* Should this rtgroup inode be present? */
> > +bool
> > +xfs_rtginode_enabled(
> > +	struct xfs_rtgroup	*rtg,
> > +	enum xfs_rtg_inodes	type)
> > +{
> > +	const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
> > +
> > +	if (!ops->enabled)
> > +		return true;
> > +	return ops->enabled(rtg->rtg_mount);
> > +}
> > +
> > +/* Load and existing rtgroup inode into the rtgroup structure. */
> > +int
> > +xfs_rtginode_load(
> > +	struct xfs_rtgroup	*rtg,
> > +	enum xfs_rtg_inodes	type,
> > +	struct xfs_trans	*tp)
> > +{
> > +	struct xfs_mount	*mp = tp->t_mountp;
> > +	const char		*path;
> > +	struct xfs_inode	*ip;
> > +	const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
> > +	int			error;
> > +
> > +	if (!xfs_rtginode_enabled(rtg, type))
> > +		return 0;
> > +
> > +	if (!mp->m_rtdirip)
> > +		return -EFSCORRUPTED;
> > +
> > +	path = xfs_rtginode_path(rtg->rtg_rgno, type);
> > +	if (!path)
> > +		return -ENOMEM;
> > +	error = xfs_metadir_load(tp, mp->m_rtdirip, path, ops->metafile_type,
> > +			&ip);
> > +	kfree(path);
> > +
> > +	if (error)
> > +		return error;
> > +
> > +	if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
> > +			       ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
> > +		xfs_irele(ip);
> > +		return -EFSCORRUPTED;
> > +	}
> 
> We don't support LOCAL format for any type of regular file inodes,
> so I'm a little confiused as to why this wouldn't be caught by the
> verifier on inode read? i.e.  What problem is this trying to catch,
> and why doesn't the inode verifier catch it for us?

This is really more of a placeholder for more refactorings coming down
the line for the rtrmap patchset, which will create a new
XFS_DINODE_FMT_RMAP.  At that time we'll need to check that an inode
that we are loading to be the rmap btree actually has that set.

> > +	if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg->rtg_rgno)) {
> > +		xfs_irele(ip);
> > +		return -EFSCORRUPTED;
> > +	}
> > +
> > +	xfs_rtginode_lockdep_setup(ip, rtg->rtg_rgno, type);
> > +	rtg->rtg_inodes[type] = ip;
> > +	return 0;
> > +}
> > +
> > +/* Release an rtgroup metadata inode. */
> > +void
> > +xfs_rtginode_irele(
> > +	struct xfs_inode	**ipp)
> > +{
> > +	if (*ipp)
> > +		xfs_irele(*ipp);
> > +	*ipp = NULL;
> > +}
> > +
> > +/* Add a metadata inode for a realtime rmap btree. */
> > +int
> > +xfs_rtginode_create(
> > +	struct xfs_rtgroup		*rtg,
> > +	enum xfs_rtg_inodes		type,
> > +	bool				init)
> 
> This doesn't seem to belong in this patchset...
> 
> ....
> 
> > +/* Create the parent directory for all rtgroup inodes and load it. */
> > +int
> > +xfs_rtginode_mkdir_parent(
> > +	struct xfs_mount	*mp)
> 
> Or this...
> 
> -Dave.
> 
> -- 
> Dave Chinner
> david@fromorbit.com
>
Dave Chinner Aug. 27, 2024, 1:05 a.m. UTC | #4
On Mon, Aug 26, 2024 at 11:37:34AM -0700, Darrick J. Wong wrote:
> On Mon, Aug 26, 2024 at 11:41:19AM +1000, Dave Chinner wrote:
> > On Thu, Aug 22, 2024 at 05:18:18PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Create the necessary per-rtgroup infrastructure that we need to load
> > > metadata inodes into memory.
> > > 
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > >  fs/xfs/libxfs/xfs_rtgroup.c |  182 +++++++++++++++++++++++++++++++++++++++++++
> > >  fs/xfs/libxfs/xfs_rtgroup.h |   28 +++++++
> > >  fs/xfs/xfs_mount.h          |    1 
> > >  fs/xfs/xfs_rtalloc.c        |   48 +++++++++++
> > >  4 files changed, 258 insertions(+), 1 deletion(-)
> > > 
> > > 
> > > diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
> > > index ae6d67c673b1a..50e4a56d749f0 100644
> > > --- a/fs/xfs/libxfs/xfs_rtgroup.c
> > > +++ b/fs/xfs/libxfs/xfs_rtgroup.c
> > > @@ -30,6 +30,8 @@
> > >  #include "xfs_icache.h"
> > >  #include "xfs_rtgroup.h"
> > >  #include "xfs_rtbitmap.h"
> > > +#include "xfs_metafile.h"
> > > +#include "xfs_metadir.h"
> > >  
> > >  /*
> > >   * Passive reference counting access wrappers to the rtgroup structures.  If
> > > @@ -295,3 +297,183 @@ xfs_rtginode_lockdep_setup(
> > >  #else
> > >  #define xfs_rtginode_lockdep_setup(ip, rgno, type)	do { } while (0)
> > >  #endif /* CONFIG_PROVE_LOCKING */
> > > +
> > > +struct xfs_rtginode_ops {
> > > +	const char		*name;	/* short name */
> > > +
> > > +	enum xfs_metafile_type	metafile_type;
> > > +
> > > +	/* Does the fs have this feature? */
> > > +	bool			(*enabled)(struct xfs_mount *mp);
> > > +
> > > +	/* Create this rtgroup metadata inode and initialize it. */
> > > +	int			(*create)(struct xfs_rtgroup *rtg,
> > > +					  struct xfs_inode *ip,
> > > +					  struct xfs_trans *tp,
> > > +					  bool init);
> > > +};
> > 
> > What's all this for?
> > 
> > AFAICT, loading the inodes into the rtgs requires a call to
> > xfs_metadir_load() when initialising the rtg (either at mount or
> > lazily on the first access to the rtg). Hence I'm not really sure
> > what this complexity is needed for, and the commit message is not
> > very informative....
> 
> Yes, the creation and mkdir code in here is really to support growfs,
> mkfs, and repair.  How about I change the commit message to:
> 
> "Create the necessary per-rtgroup infrastructure that we need to load
> metadata inodes into memory and to create directory trees on the fly.
> Loading is needed by the mounting process.  Creation is needed by
> growfs, mkfs, and repair."

IMO it would have been nicer to add this with the patch that
adds growfs support for rtgs. That way the initial inode loading
would be much easier to understand and review, and the rest of it
would have enough context to be able to review it sanely. There
isn't enough context in this patch to determine if the creation code
is sane or works correctly....


> > > +	path = xfs_rtginode_path(rtg->rtg_rgno, type);
> > > +	if (!path)
> > > +		return -ENOMEM;
> > > +	error = xfs_metadir_load(tp, mp->m_rtdirip, path, ops->metafile_type,
> > > +			&ip);
> > > +	kfree(path);
> > > +
> > > +	if (error)
> > > +		return error;
> > > +
> > > +	if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
> > > +			       ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
> > > +		xfs_irele(ip);
> > > +		return -EFSCORRUPTED;
> > > +	}
> > 
> > We don't support LOCAL format for any type of regular file inodes,
> > so I'm a little confiused as to why this wouldn't be caught by the
> > verifier on inode read? i.e.  What problem is this trying to catch,
> > and why doesn't the inode verifier catch it for us?
> 
> This is really more of a placeholder for more refactorings coming down
> the line for the rtrmap patchset, which will create a new
> XFS_DINODE_FMT_RMAP.  At that time we'll need to check that an inode
> that we are loading to be the rmap btree actually has that set.

Ok, can you leave a comment to indicate this so I don't have to
remember why this code exists?

-Dave.
Darrick J. Wong Aug. 27, 2024, 2:01 a.m. UTC | #5
On Tue, Aug 27, 2024 at 11:05:53AM +1000, Dave Chinner wrote:
> On Mon, Aug 26, 2024 at 11:37:34AM -0700, Darrick J. Wong wrote:
> > On Mon, Aug 26, 2024 at 11:41:19AM +1000, Dave Chinner wrote:
> > > On Thu, Aug 22, 2024 at 05:18:18PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > 
> > > > Create the necessary per-rtgroup infrastructure that we need to load
> > > > metadata inodes into memory.
> > > > 
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > >  fs/xfs/libxfs/xfs_rtgroup.c |  182 +++++++++++++++++++++++++++++++++++++++++++
> > > >  fs/xfs/libxfs/xfs_rtgroup.h |   28 +++++++
> > > >  fs/xfs/xfs_mount.h          |    1 
> > > >  fs/xfs/xfs_rtalloc.c        |   48 +++++++++++
> > > >  4 files changed, 258 insertions(+), 1 deletion(-)
> > > > 
> > > > 
> > > > diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
> > > > index ae6d67c673b1a..50e4a56d749f0 100644
> > > > --- a/fs/xfs/libxfs/xfs_rtgroup.c
> > > > +++ b/fs/xfs/libxfs/xfs_rtgroup.c
> > > > @@ -30,6 +30,8 @@
> > > >  #include "xfs_icache.h"
> > > >  #include "xfs_rtgroup.h"
> > > >  #include "xfs_rtbitmap.h"
> > > > +#include "xfs_metafile.h"
> > > > +#include "xfs_metadir.h"
> > > >  
> > > >  /*
> > > >   * Passive reference counting access wrappers to the rtgroup structures.  If
> > > > @@ -295,3 +297,183 @@ xfs_rtginode_lockdep_setup(
> > > >  #else
> > > >  #define xfs_rtginode_lockdep_setup(ip, rgno, type)	do { } while (0)
> > > >  #endif /* CONFIG_PROVE_LOCKING */
> > > > +
> > > > +struct xfs_rtginode_ops {
> > > > +	const char		*name;	/* short name */
> > > > +
> > > > +	enum xfs_metafile_type	metafile_type;
> > > > +
> > > > +	/* Does the fs have this feature? */
> > > > +	bool			(*enabled)(struct xfs_mount *mp);
> > > > +
> > > > +	/* Create this rtgroup metadata inode and initialize it. */
> > > > +	int			(*create)(struct xfs_rtgroup *rtg,
> > > > +					  struct xfs_inode *ip,
> > > > +					  struct xfs_trans *tp,
> > > > +					  bool init);
> > > > +};
> > > 
> > > What's all this for?
> > > 
> > > AFAICT, loading the inodes into the rtgs requires a call to
> > > xfs_metadir_load() when initialising the rtg (either at mount or
> > > lazily on the first access to the rtg). Hence I'm not really sure
> > > what this complexity is needed for, and the commit message is not
> > > very informative....
> > 
> > Yes, the creation and mkdir code in here is really to support growfs,
> > mkfs, and repair.  How about I change the commit message to:
> > 
> > "Create the necessary per-rtgroup infrastructure that we need to load
> > metadata inodes into memory and to create directory trees on the fly.
> > Loading is needed by the mounting process.  Creation is needed by
> > growfs, mkfs, and repair."
> 
> IMO it would have been nicer to add this with the patch that
> adds growfs support for rtgs. That way the initial inode loading
> would be much easier to understand and review, and the rest of it
> would have enough context to be able to review it sanely. There
> isn't enough context in this patch to determine if the creation code
> is sane or works correctly....

<nod> I think that's doable.  I also want to change the name to
->init_inode because that's the only thing it can really do at the point
that we're creating inodes in growfs.

> 
> > > > +	path = xfs_rtginode_path(rtg->rtg_rgno, type);
> > > > +	if (!path)
> > > > +		return -ENOMEM;
> > > > +	error = xfs_metadir_load(tp, mp->m_rtdirip, path, ops->metafile_type,
> > > > +			&ip);
> > > > +	kfree(path);
> > > > +
> > > > +	if (error)
> > > > +		return error;
> > > > +
> > > > +	if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
> > > > +			       ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
> > > > +		xfs_irele(ip);
> > > > +		return -EFSCORRUPTED;
> > > > +	}
> > > 
> > > We don't support LOCAL format for any type of regular file inodes,
> > > so I'm a little confiused as to why this wouldn't be caught by the
> > > verifier on inode read? i.e.  What problem is this trying to catch,
> > > and why doesn't the inode verifier catch it for us?
> > 
> > This is really more of a placeholder for more refactorings coming down
> > the line for the rtrmap patchset, which will create a new
> > XFS_DINODE_FMT_RMAP.  At that time we'll need to check that an inode
> > that we are loading to be the rmap btree actually has that set.
> 
> Ok, can you leave a comment to indicate this so I don't have to
> remember why this code exists?

Will do.

--D

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
index ae6d67c673b1a..50e4a56d749f0 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.c
+++ b/fs/xfs/libxfs/xfs_rtgroup.c
@@ -30,6 +30,8 @@ 
 #include "xfs_icache.h"
 #include "xfs_rtgroup.h"
 #include "xfs_rtbitmap.h"
+#include "xfs_metafile.h"
+#include "xfs_metadir.h"
 
 /*
  * Passive reference counting access wrappers to the rtgroup structures.  If
@@ -295,3 +297,183 @@  xfs_rtginode_lockdep_setup(
 #else
 #define xfs_rtginode_lockdep_setup(ip, rgno, type)	do { } while (0)
 #endif /* CONFIG_PROVE_LOCKING */
+
+struct xfs_rtginode_ops {
+	const char		*name;	/* short name */
+
+	enum xfs_metafile_type	metafile_type;
+
+	/* Does the fs have this feature? */
+	bool			(*enabled)(struct xfs_mount *mp);
+
+	/* Create this rtgroup metadata inode and initialize it. */
+	int			(*create)(struct xfs_rtgroup *rtg,
+					  struct xfs_inode *ip,
+					  struct xfs_trans *tp,
+					  bool init);
+};
+
+static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
+};
+
+/* Return the shortname of this rtgroup inode. */
+const char *
+xfs_rtginode_name(
+	enum xfs_rtg_inodes	type)
+{
+	return xfs_rtginode_ops[type].name;
+}
+
+/* Should this rtgroup inode be present? */
+bool
+xfs_rtginode_enabled(
+	struct xfs_rtgroup	*rtg,
+	enum xfs_rtg_inodes	type)
+{
+	const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+
+	if (!ops->enabled)
+		return true;
+	return ops->enabled(rtg->rtg_mount);
+}
+
+/* Load and existing rtgroup inode into the rtgroup structure. */
+int
+xfs_rtginode_load(
+	struct xfs_rtgroup	*rtg,
+	enum xfs_rtg_inodes	type,
+	struct xfs_trans	*tp)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	const char		*path;
+	struct xfs_inode	*ip;
+	const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+	int			error;
+
+	if (!xfs_rtginode_enabled(rtg, type))
+		return 0;
+
+	if (!mp->m_rtdirip)
+		return -EFSCORRUPTED;
+
+	path = xfs_rtginode_path(rtg->rtg_rgno, type);
+	if (!path)
+		return -ENOMEM;
+	error = xfs_metadir_load(tp, mp->m_rtdirip, path, ops->metafile_type,
+			&ip);
+	kfree(path);
+
+	if (error)
+		return error;
+
+	if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+			       ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
+		xfs_irele(ip);
+		return -EFSCORRUPTED;
+	}
+
+	if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg->rtg_rgno)) {
+		xfs_irele(ip);
+		return -EFSCORRUPTED;
+	}
+
+	xfs_rtginode_lockdep_setup(ip, rtg->rtg_rgno, type);
+	rtg->rtg_inodes[type] = ip;
+	return 0;
+}
+
+/* Release an rtgroup metadata inode. */
+void
+xfs_rtginode_irele(
+	struct xfs_inode	**ipp)
+{
+	if (*ipp)
+		xfs_irele(*ipp);
+	*ipp = NULL;
+}
+
+/* Add a metadata inode for a realtime rmap btree. */
+int
+xfs_rtginode_create(
+	struct xfs_rtgroup		*rtg,
+	enum xfs_rtg_inodes		type,
+	bool				init)
+{
+	const struct xfs_rtginode_ops	*ops = &xfs_rtginode_ops[type];
+	struct xfs_mount		*mp = rtg->rtg_mount;
+	struct xfs_metadir_update	upd = {
+		.dp			= mp->m_rtdirip,
+		.metafile_type		= ops->metafile_type,
+	};
+	int				error;
+
+	if (!xfs_rtginode_enabled(rtg, type))
+		return 0;
+
+	if (!mp->m_rtdirip)
+		return -EFSCORRUPTED;
+
+	upd.path = xfs_rtginode_path(rtg->rtg_rgno, type);
+	if (!upd.path)
+		return -ENOMEM;
+
+	error = xfs_metadir_start_create(&upd);
+	if (error)
+		goto out_path;
+
+	error = xfs_metadir_create(&upd, S_IFREG);
+	if (error)
+		return error;
+
+	xfs_rtginode_lockdep_setup(upd.ip, rtg->rtg_rgno, type);
+
+	upd.ip->i_projid = rtg->rtg_rgno;
+	error = ops->create(rtg, upd.ip, upd.tp, init);
+	if (error)
+		goto out_cancel;
+
+	error = xfs_metadir_commit(&upd);
+	if (error)
+		goto out_path;
+
+	kfree(upd.path);
+	xfs_finish_inode_setup(upd.ip);
+	rtg->rtg_inodes[type] = upd.ip;
+	return 0;
+
+out_cancel:
+	xfs_metadir_cancel(&upd, error);
+	/* Have to finish setting up the inode to ensure it's deleted. */
+	if (upd.ip) {
+		xfs_finish_inode_setup(upd.ip);
+		xfs_irele(upd.ip);
+	}
+out_path:
+	kfree(upd.path);
+	return error;
+}
+
+/* Create the parent directory for all rtgroup inodes and load it. */
+int
+xfs_rtginode_mkdir_parent(
+	struct xfs_mount	*mp)
+{
+	if (!mp->m_metadirip)
+		return -EFSCORRUPTED;
+
+	return xfs_metadir_mkdir(mp->m_metadirip, "rtgroups", &mp->m_rtdirip);
+}
+
+/* Load the parent directory of all rtgroup inodes. */
+int
+xfs_rtginode_load_parent(
+	struct xfs_trans	*tp)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+
+	if (!mp->m_metadirip)
+		return -EFSCORRUPTED;
+
+	return xfs_metadir_load(tp, mp->m_metadirip, "rtgroups",
+			XFS_METAFILE_DIR, &mp->m_rtdirip);
+}
diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h
index d2eb2cd5775dd..b5c769211b4bb 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.h
+++ b/fs/xfs/libxfs/xfs_rtgroup.h
@@ -9,6 +9,14 @@ 
 struct xfs_mount;
 struct xfs_trans;
 
+enum xfs_rtg_inodes {
+	XFS_RTGI_MAX,
+};
+
+#ifdef MAX_LOCKDEP_SUBCLASSES
+static_assert(XFS_RTGI_MAX <= MAX_LOCKDEP_SUBCLASSES);
+#endif
+
 /*
  * Realtime group incore structure, similar to the per-AG structure.
  */
@@ -19,6 +27,9 @@  struct xfs_rtgroup {
 	atomic_t		rtg_active_ref;	/* active reference count */
 	wait_queue_head_t	rtg_active_wq;/* woken active_ref falls to zero */
 
+	/* per-rtgroup metadata inodes */
+	struct xfs_inode	*rtg_inodes[1 /* hack */];
+
 	/* Number of blocks in this group */
 	xfs_rtxnum_t		rtg_extents;
 
@@ -218,6 +229,23 @@  void xfs_rtgroup_lock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags);
 void xfs_rtgroup_unlock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags);
 void xfs_rtgroup_trans_join(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
 		unsigned int rtglock_flags);
+
+int xfs_rtginode_mkdir_parent(struct xfs_mount *mp);
+int xfs_rtginode_load_parent(struct xfs_trans *tp);
+
+const char *xfs_rtginode_name(enum xfs_rtg_inodes type);
+bool xfs_rtginode_enabled(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type);
+int xfs_rtginode_load(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type,
+		struct xfs_trans *tp);
+int xfs_rtginode_create(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type,
+		bool init);
+void xfs_rtginode_irele(struct xfs_inode **ipp);
+
+static inline const char *xfs_rtginode_path(xfs_rgnumber_t rgno,
+		enum xfs_rtg_inodes type)
+{
+	return kasprintf(GFP_KERNEL, "%u.%s", rgno, xfs_rtginode_name(type));
+}
 #else
 # define xfs_rtgroup_extents(mp, rgno)		(0)
 # define xfs_rtgroup_lock(rtg, gf)		((void)0)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f69da6802e8c1..73959c26075a5 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -94,6 +94,7 @@  typedef struct xfs_mount {
 	struct xfs_inode	*m_rsumip;	/* pointer to summary inode */
 	struct xfs_inode	*m_rootip;	/* pointer to root directory */
 	struct xfs_inode	*m_metadirip;	/* ptr to metadata directory */
+	struct xfs_inode	*m_rtdirip;	/* ptr to realtime metadir */
 	struct xfs_quotainfo	*m_quotainfo;	/* disk quota information */
 	struct xfs_buftarg	*m_ddev_targp;	/* data device */
 	struct xfs_buftarg	*m_logdev_targp;/* log device */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 59898117f817d..dcdb726ebe4a0 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -28,6 +28,7 @@ 
 #include "xfs_da_format.h"
 #include "xfs_metafile.h"
 #include "xfs_rtgroup.h"
+#include "xfs_error.h"
 
 /*
  * Return whether there are any free extents in the size range given
@@ -652,6 +653,16 @@  xfs_rtallocate_extent_size(
 	return -ENOSPC;
 }
 
+static void
+xfs_rtunmount_rtg(
+	struct xfs_rtgroup	*rtg)
+{
+	int			i;
+
+	for (i = 0; i < XFS_RTGI_MAX; i++)
+		xfs_rtginode_irele(&rtg->rtg_inodes[i]);
+}
+
 static int
 xfs_alloc_rsum_cache(
 	struct xfs_mount	*mp,
@@ -1127,6 +1138,18 @@  xfs_rtmount_iread_extents(
 	return error;
 }
 
+static void
+xfs_rtgroup_unmount_inodes(
+	struct xfs_mount	*mp)
+{
+	struct xfs_rtgroup	*rtg;
+	xfs_rgnumber_t		rgno;
+
+	for_each_rtgroup(mp, rgno, rtg)
+		xfs_rtunmount_rtg(rtg);
+	xfs_rtginode_irele(&mp->m_rtdirip);
+}
+
 /*
  * Get the bitmap and summary inodes and the summary cache into the mount
  * structure at mount time.
@@ -1139,6 +1162,7 @@  xfs_rtmount_inodes(
 	struct xfs_sb		*sbp = &mp->m_sb;
 	struct xfs_rtgroup	*rtg;
 	xfs_rgnumber_t		rgno;
+	unsigned int		i;
 	int			error;
 
 	error = xfs_trans_alloc_empty(mp, &tp);
@@ -1169,15 +1193,34 @@  xfs_rtmount_inodes(
 	if (error)
 		goto out_rele_summary;
 
-	for_each_rtgroup(mp, rgno, rtg)
+	if (xfs_has_rtgroups(mp) && mp->m_sb.sb_rgcount > 0) {
+		error = xfs_rtginode_load_parent(tp);
+		if (error)
+			goto out_rele_rtdir;
+	}
+
+	for_each_rtgroup(mp, rgno, rtg) {
 		rtg->rtg_extents = xfs_rtgroup_extents(mp, rtg->rtg_rgno);
 
+		for (i = 0; i < XFS_RTGI_MAX; i++) {
+			error = xfs_rtginode_load(rtg, i, tp);
+			if (error) {
+				xfs_rtgroup_rele(rtg);
+				goto out_rele_inodes;
+			}
+		}
+	}
+
 	error = xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks);
 	if (error)
 		goto out_rele_summary;
 	xfs_trans_cancel(tp);
 	return 0;
 
+out_rele_inodes:
+	xfs_rtgroup_unmount_inodes(mp);
+out_rele_rtdir:
+	xfs_rtginode_irele(&mp->m_rtdirip);
 out_rele_summary:
 	xfs_irele(mp->m_rsumip);
 out_rele_bitmap:
@@ -1192,6 +1235,9 @@  xfs_rtunmount_inodes(
 	struct xfs_mount	*mp)
 {
 	kvfree(mp->m_rsum_cache);
+
+	xfs_rtgroup_unmount_inodes(mp);
+	xfs_rtginode_irele(&mp->m_rtdirip);
 	if (mp->m_rbmip)
 		xfs_irele(mp->m_rbmip);
 	if (mp->m_rsumip)