diff mbox series

[08/26] xfs: hide metadata inodes from everyone because they are special

Message ID 172437085312.57482.9340127129544109933.stgit@frogsfrogsfrogs (mailing list archive)
State Accepted, archived
Headers show
Series [01/26] xfs: define the on-disk format for the metadir feature | expand

Commit Message

Darrick J. Wong Aug. 23, 2024, 12:04 a.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Metadata inodes are private files and therefore cannot be exposed to
userspace.  This means no bulkstat, no open-by-handle, no linking them
into the directory tree, and no feeding them to LSMs.  As such, we mark
them S_PRIVATE, which stops all that.

While we're at it, put them in a separate lockdep class so that it won't
get confused by "recursive" i_rwsem locking such as what happens when we
write to a rt file and need to allocate from the rt bitmap file.  The
static function that we use to do this will be exported in the rtgroups
patchset.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/scrub/tempfile.c |    8 ++++++++
 fs/xfs/xfs_iops.c       |   15 ++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

Comments

'Christoph Hellwig' Aug. 23, 2024, 4:40 a.m. UTC | #1
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Dave Chinner Aug. 26, 2024, 12:41 a.m. UTC | #2
On Thu, Aug 22, 2024 at 05:04:14PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Metadata inodes are private files and therefore cannot be exposed to
> userspace.  This means no bulkstat, no open-by-handle, no linking them
> into the directory tree, and no feeding them to LSMs.  As such, we mark
> them S_PRIVATE, which stops all that.

Can you merge this back up into the initial iget support code?

> 
> While we're at it, put them in a separate lockdep class so that it won't
> get confused by "recursive" i_rwsem locking such as what happens when we
> write to a rt file and need to allocate from the rt bitmap file.  The
> static function that we use to do this will be exported in the rtgroups
> patchset.

Stale commit message? There's nothing of the sort in this patch....

> 
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
>  fs/xfs/scrub/tempfile.c |    8 ++++++++
>  fs/xfs/xfs_iops.c       |   15 ++++++++++++++-
>  2 files changed, 22 insertions(+), 1 deletion(-)
> 
> 
> diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
> index 177f922acfaf1..3c5a1d77fefae 100644
> --- a/fs/xfs/scrub/tempfile.c
> +++ b/fs/xfs/scrub/tempfile.c
> @@ -844,6 +844,14 @@ xrep_is_tempfile(
>  	const struct xfs_inode	*ip)
>  {
>  	const struct inode	*inode = &ip->i_vnode;
> +	struct xfs_mount	*mp = ip->i_mount;
> +
> +	/*
> +	 * Files in the metadata directory tree also have S_PRIVATE set and
> +	 * IOP_XATTR unset, so we must distinguish them separately.
> +	 */
> +	if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
> +		return false;

Why do you need to check both xfs_has_metadir() and the inode flag
here? The latter should only be set if the former is set, yes?
If it's the other way around, then we have an on-disk corruption...

>  	if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
>  		return true;

> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index 1cdc8034f54d9..c1686163299a0 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -42,7 +42,9 @@
>   * held. For regular files, the lock order is the other way around - the
>   * mmap_lock is taken during the page fault, and then we lock the ilock to do
>   * block mapping. Hence we need a different class for the directory ilock so
> - * that lockdep can tell them apart.
> + * that lockdep can tell them apart.  Directories in the metadata directory
> + * tree get a separate class so that lockdep reports will warn us if someone
> + * ever tries to lock regular directories after locking metadata directories.
>   */
>  static struct lock_class_key xfs_nondir_ilock_class;
>  static struct lock_class_key xfs_dir_ilock_class;
> @@ -1299,6 +1301,7 @@ xfs_setup_inode(
>  {
>  	struct inode		*inode = &ip->i_vnode;
>  	gfp_t			gfp_mask;
> +	bool			is_meta = xfs_is_metadata_inode(ip);
>  
>  	inode->i_ino = ip->i_ino;
>  	inode->i_state |= I_NEW;
> @@ -1310,6 +1313,16 @@ xfs_setup_inode(
>  	i_size_write(inode, ip->i_disk_size);
>  	xfs_diflags_to_iflags(ip, true);
>  
> +	/*
> +	 * Mark our metadata files as private so that LSMs and the ACL code
> +	 * don't try to add their own metadata or reason about these files,
> +	 * and users cannot ever obtain file handles to them.
> +	 */
> +	if (is_meta) {
> +		inode->i_flags |= S_PRIVATE;
> +		inode->i_opflags &= ~IOP_XATTR;
> +	}

No need for a temporary variable here.

-Dave.
Darrick J. Wong Aug. 26, 2024, 5:33 p.m. UTC | #3
On Mon, Aug 26, 2024 at 10:41:18AM +1000, Dave Chinner wrote:
> On Thu, Aug 22, 2024 at 05:04:14PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Metadata inodes are private files and therefore cannot be exposed to
> > userspace.  This means no bulkstat, no open-by-handle, no linking them
> > into the directory tree, and no feeding them to LSMs.  As such, we mark
> > them S_PRIVATE, which stops all that.
> 
> Can you merge this back up into the initial iget support code?
> 
> > 
> > While we're at it, put them in a separate lockdep class so that it won't
> > get confused by "recursive" i_rwsem locking such as what happens when we
> > write to a rt file and need to allocate from the rt bitmap file.  The
> > static function that we use to do this will be exported in the rtgroups
> > patchset.
> 
> Stale commit message? There's nothing of the sort in this patch....

Yeah, sorry.  Previously there were separate lockdep classes for metadir
directories and files each, but hch and I decided that each consumer of
a metadata file should set its own class accordingly, and that the
directories could continue using xfs_nondir_ilock_class as the only
code that uses them is either mount time setup code or repair.

> > 
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> >  fs/xfs/scrub/tempfile.c |    8 ++++++++
> >  fs/xfs/xfs_iops.c       |   15 ++++++++++++++-
> >  2 files changed, 22 insertions(+), 1 deletion(-)
> > 
> > 
> > diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
> > index 177f922acfaf1..3c5a1d77fefae 100644
> > --- a/fs/xfs/scrub/tempfile.c
> > +++ b/fs/xfs/scrub/tempfile.c
> > @@ -844,6 +844,14 @@ xrep_is_tempfile(
> >  	const struct xfs_inode	*ip)
> >  {
> >  	const struct inode	*inode = &ip->i_vnode;
> > +	struct xfs_mount	*mp = ip->i_mount;
> > +
> > +	/*
> > +	 * Files in the metadata directory tree also have S_PRIVATE set and
> > +	 * IOP_XATTR unset, so we must distinguish them separately.
> > +	 */
> > +	if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
> > +		return false;
> 
> Why do you need to check both xfs_has_metadir() and the inode flag
> here? The latter should only be set if the former is set, yes?
> If it's the other way around, then we have an on-disk corruption...

Probably just stale code that's been sitting around for a while.
But yes, this could all be:

	if (xfs_is_metadir_inode(ip))
		return false;

since the inode verifier should have already caught this.

> >  	if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
> >  		return true;
> 
> > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> > index 1cdc8034f54d9..c1686163299a0 100644
> > --- a/fs/xfs/xfs_iops.c
> > +++ b/fs/xfs/xfs_iops.c
> > @@ -42,7 +42,9 @@
> >   * held. For regular files, the lock order is the other way around - the
> >   * mmap_lock is taken during the page fault, and then we lock the ilock to do
> >   * block mapping. Hence we need a different class for the directory ilock so
> > - * that lockdep can tell them apart.
> > + * that lockdep can tell them apart.  Directories in the metadata directory
> > + * tree get a separate class so that lockdep reports will warn us if someone
> > + * ever tries to lock regular directories after locking metadata directories.
> >   */
> >  static struct lock_class_key xfs_nondir_ilock_class;
> >  static struct lock_class_key xfs_dir_ilock_class;
> > @@ -1299,6 +1301,7 @@ xfs_setup_inode(
> >  {
> >  	struct inode		*inode = &ip->i_vnode;
> >  	gfp_t			gfp_mask;
> > +	bool			is_meta = xfs_is_metadata_inode(ip);
> >  
> >  	inode->i_ino = ip->i_ino;
> >  	inode->i_state |= I_NEW;
> > @@ -1310,6 +1313,16 @@ xfs_setup_inode(
> >  	i_size_write(inode, ip->i_disk_size);
> >  	xfs_diflags_to_iflags(ip, true);
> >  
> > +	/*
> > +	 * Mark our metadata files as private so that LSMs and the ACL code
> > +	 * don't try to add their own metadata or reason about these files,
> > +	 * and users cannot ever obtain file handles to them.
> > +	 */
> > +	if (is_meta) {
> > +		inode->i_flags |= S_PRIVATE;
> > +		inode->i_opflags &= ~IOP_XATTR;
> > +	}
> 
> No need for a temporary variable here.

<nod>

--D

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
diff mbox series

Patch

diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index 177f922acfaf1..3c5a1d77fefae 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -844,6 +844,14 @@  xrep_is_tempfile(
 	const struct xfs_inode	*ip)
 {
 	const struct inode	*inode = &ip->i_vnode;
+	struct xfs_mount	*mp = ip->i_mount;
+
+	/*
+	 * Files in the metadata directory tree also have S_PRIVATE set and
+	 * IOP_XATTR unset, so we must distinguish them separately.
+	 */
+	if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
+		return false;
 
 	if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
 		return true;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1cdc8034f54d9..c1686163299a0 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -42,7 +42,9 @@ 
  * held. For regular files, the lock order is the other way around - the
  * mmap_lock is taken during the page fault, and then we lock the ilock to do
  * block mapping. Hence we need a different class for the directory ilock so
- * that lockdep can tell them apart.
+ * that lockdep can tell them apart.  Directories in the metadata directory
+ * tree get a separate class so that lockdep reports will warn us if someone
+ * ever tries to lock regular directories after locking metadata directories.
  */
 static struct lock_class_key xfs_nondir_ilock_class;
 static struct lock_class_key xfs_dir_ilock_class;
@@ -1299,6 +1301,7 @@  xfs_setup_inode(
 {
 	struct inode		*inode = &ip->i_vnode;
 	gfp_t			gfp_mask;
+	bool			is_meta = xfs_is_metadata_inode(ip);
 
 	inode->i_ino = ip->i_ino;
 	inode->i_state |= I_NEW;
@@ -1310,6 +1313,16 @@  xfs_setup_inode(
 	i_size_write(inode, ip->i_disk_size);
 	xfs_diflags_to_iflags(ip, true);
 
+	/*
+	 * Mark our metadata files as private so that LSMs and the ACL code
+	 * don't try to add their own metadata or reason about these files,
+	 * and users cannot ever obtain file handles to them.
+	 */
+	if (is_meta) {
+		inode->i_flags |= S_PRIVATE;
+		inode->i_opflags &= ~IOP_XATTR;
+	}
+
 	if (S_ISDIR(inode->i_mode)) {
 		/*
 		 * We set the i_rwsem class here to avoid potential races with