diff mbox series

[1/9] anon_inode: use a proper mode internally

Message ID 20250407-work-anon_inode-v1-1-53a44c20d44e@kernel.org (mailing list archive)
State New
Headers show
Series fs: harden anon inodes | expand

Commit Message

Christian Brauner April 7, 2025, 9:54 a.m. UTC
This allows the VFS to not trip over anonymous inodes and we can add
asserts based on the mode into the vfs. When we report it to userspace
we can simply hide the mode to avoid regressions. I've audited all
direct callers of alloc_anon_inode() and only secretmen overrides i_mode
and i_op inode operations but it already uses a regular file.

Fixes: af153bb63a336 ("vfs: catch invalid modes in may_open()")
Cc: <stable@vger.kernel.org> # all LTS kernels
Reported-by: syzbot+5d8e79d323a13aa0b248@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/67ed3fb3.050a0220.14623d.0009.GAE@google.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/anon_inodes.c | 36 ++++++++++++++++++++++++++++++++++++
 fs/internal.h    |  3 +++
 fs/libfs.c       |  2 +-
 3 files changed, 40 insertions(+), 1 deletion(-)

Comments

Jeff Layton April 7, 2025, 12:19 p.m. UTC | #1
On Mon, 2025-04-07 at 11:54 +0200, Christian Brauner wrote:
> This allows the VFS to not trip over anonymous inodes and we can add
> asserts based on the mode into the vfs. When we report it to userspace
> we can simply hide the mode to avoid regressions. I've audited all
> direct callers of alloc_anon_inode() and only secretmen overrides i_mode
> and i_op inode operations but it already uses a regular file.
> 
> Fixes: af153bb63a336 ("vfs: catch invalid modes in may_open()")
> Cc: <stable@vger.kernel.org> # all LTS kernels
> Reported-by: syzbot+5d8e79d323a13aa0b248@syzkaller.appspotmail.com
> Closes: https://lore.kernel.org/all/67ed3fb3.050a0220.14623d.0009.GAE@google.com
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
>  fs/anon_inodes.c | 36 ++++++++++++++++++++++++++++++++++++
>  fs/internal.h    |  3 +++
>  fs/libfs.c       |  2 +-
>  3 files changed, 40 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
> index 583ac81669c2..42e4b9c34f89 100644
> --- a/fs/anon_inodes.c
> +++ b/fs/anon_inodes.c
> @@ -24,9 +24,43 @@
>  
>  #include <linux/uaccess.h>
>  
> +#include "internal.h"
> +
>  static struct vfsmount *anon_inode_mnt __ro_after_init;
>  static struct inode *anon_inode_inode __ro_after_init;
>  
> +/*
> + * User space expects anonymous inodes to have no file type in st_mode.

Weird. Does anything actually depend on this?

ISTM that they should report a file type.

> + *
> + * In particular, 'lsof' has this legacy logic:
> + *
> + *	type = s->st_mode & S_IFMT;
> + *	switch (type) {
> + *	  ...
> + *	case 0:
> + *		if (!strcmp(p, "anon_inode"))
> + *			Lf->ntype = Ntype = N_ANON_INODE;
> + *
> + * to detect our old anon_inode logic.
> + *
> + * Rather than mess with our internal sane inode data, just fix it
> + * up here in getattr() by masking off the format bits.
> + */
> +int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
> +		       struct kstat *stat, u32 request_mask,
> +		       unsigned int query_flags)
> +{
> +	struct inode *inode = d_inode(path->dentry);
> +
> +	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
> +	stat->mode &= ~S_IFMT;
> +	return 0;
> +}
> +
> +static const struct inode_operations anon_inode_operations = {
> +	.getattr = anon_inode_getattr,
> +};
> +
>  /*
>   * anon_inodefs_dname() is called from d_path().
>   */
> @@ -66,6 +100,7 @@ static struct inode *anon_inode_make_secure_inode(
>  	if (IS_ERR(inode))
>  		return inode;
>  	inode->i_flags &= ~S_PRIVATE;
> +	inode->i_op = &anon_inode_operations;
>  	error =	security_inode_init_security_anon(inode, &QSTR(name),
>  						  context_inode);
>  	if (error) {
> @@ -313,6 +348,7 @@ static int __init anon_inode_init(void)
>  	anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
>  	if (IS_ERR(anon_inode_inode))
>  		panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
> +	anon_inode_inode->i_op = &anon_inode_operations;
>  
>  	return 0;
>  }
> diff --git a/fs/internal.h b/fs/internal.h
> index b9b3e29a73fd..717dc9eb6185 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -343,3 +343,6 @@ static inline bool path_mounted(const struct path *path)
>  void file_f_owner_release(struct file *file);
>  bool file_seek_cur_needs_f_lock(struct file *file);
>  int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
> +int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
> +		       struct kstat *stat, u32 request_mask,
> +		       unsigned int query_flags);
> diff --git a/fs/libfs.c b/fs/libfs.c
> index 6393d7c49ee6..0ad3336f5b49 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -1647,7 +1647,7 @@ struct inode *alloc_anon_inode(struct super_block *s)
>  	 * that it already _is_ on the dirty list.
>  	 */
>  	inode->i_state = I_DIRTY;
> -	inode->i_mode = S_IRUSR | S_IWUSR;
> +	inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
>  	inode->i_uid = current_fsuid();
>  	inode->i_gid = current_fsgid();
>  	inode->i_flags |= S_PRIVATE;
>
Christian Brauner April 7, 2025, 1:43 p.m. UTC | #2
On Mon, Apr 07, 2025 at 08:19:25AM -0400, Jeff Layton wrote:
> On Mon, 2025-04-07 at 11:54 +0200, Christian Brauner wrote:
> > This allows the VFS to not trip over anonymous inodes and we can add
> > asserts based on the mode into the vfs. When we report it to userspace
> > we can simply hide the mode to avoid regressions. I've audited all
> > direct callers of alloc_anon_inode() and only secretmen overrides i_mode
> > and i_op inode operations but it already uses a regular file.
> > 
> > Fixes: af153bb63a336 ("vfs: catch invalid modes in may_open()")
> > Cc: <stable@vger.kernel.org> # all LTS kernels
> > Reported-by: syzbot+5d8e79d323a13aa0b248@syzkaller.appspotmail.com
> > Closes: https://lore.kernel.org/all/67ed3fb3.050a0220.14623d.0009.GAE@google.com
> > Signed-off-by: Christian Brauner <brauner@kernel.org>
> > ---
> >  fs/anon_inodes.c | 36 ++++++++++++++++++++++++++++++++++++
> >  fs/internal.h    |  3 +++
> >  fs/libfs.c       |  2 +-
> >  3 files changed, 40 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
> > index 583ac81669c2..42e4b9c34f89 100644
> > --- a/fs/anon_inodes.c
> > +++ b/fs/anon_inodes.c
> > @@ -24,9 +24,43 @@
> >  
> >  #include <linux/uaccess.h>
> >  
> > +#include "internal.h"
> > +
> >  static struct vfsmount *anon_inode_mnt __ro_after_init;
> >  static struct inode *anon_inode_inode __ro_after_init;
> >  
> > +/*
> > + * User space expects anonymous inodes to have no file type in st_mode.
> 
> Weird. Does anything actually depend on this?

Yeah, lsof failed and started complaining about this. They're checking
the mode to recognize anonymous inodes. And tbf, it works to generically
recognizer proper anonymous inodes because they came come from very
different superblocks (dmabuf, drm, vfio, anon_inode_mnt etc.).

> 
> ISTM that they should report a file type.

I agree but that ship has probably sailed.
Jan Kara April 7, 2025, 2:04 p.m. UTC | #3
On Mon 07-04-25 11:54:15, Christian Brauner wrote:
> This allows the VFS to not trip over anonymous inodes and we can add
> asserts based on the mode into the vfs. When we report it to userspace
> we can simply hide the mode to avoid regressions. I've audited all
> direct callers of alloc_anon_inode() and only secretmen overrides i_mode
> and i_op inode operations but it already uses a regular file.
> 
> Fixes: af153bb63a336 ("vfs: catch invalid modes in may_open()")
> Cc: <stable@vger.kernel.org> # all LTS kernels
> Reported-by: syzbot+5d8e79d323a13aa0b248@syzkaller.appspotmail.com
> Closes: https://lore.kernel.org/all/67ed3fb3.050a0220.14623d.0009.GAE@google.com
> Signed-off-by: Christian Brauner <brauner@kernel.org>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/anon_inodes.c | 36 ++++++++++++++++++++++++++++++++++++
>  fs/internal.h    |  3 +++
>  fs/libfs.c       |  2 +-
>  3 files changed, 40 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
> index 583ac81669c2..42e4b9c34f89 100644
> --- a/fs/anon_inodes.c
> +++ b/fs/anon_inodes.c
> @@ -24,9 +24,43 @@
>  
>  #include <linux/uaccess.h>
>  
> +#include "internal.h"
> +
>  static struct vfsmount *anon_inode_mnt __ro_after_init;
>  static struct inode *anon_inode_inode __ro_after_init;
>  
> +/*
> + * User space expects anonymous inodes to have no file type in st_mode.
> + *
> + * In particular, 'lsof' has this legacy logic:
> + *
> + *	type = s->st_mode & S_IFMT;
> + *	switch (type) {
> + *	  ...
> + *	case 0:
> + *		if (!strcmp(p, "anon_inode"))
> + *			Lf->ntype = Ntype = N_ANON_INODE;
> + *
> + * to detect our old anon_inode logic.
> + *
> + * Rather than mess with our internal sane inode data, just fix it
> + * up here in getattr() by masking off the format bits.
> + */
> +int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
> +		       struct kstat *stat, u32 request_mask,
> +		       unsigned int query_flags)
> +{
> +	struct inode *inode = d_inode(path->dentry);
> +
> +	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
> +	stat->mode &= ~S_IFMT;
> +	return 0;
> +}
> +
> +static const struct inode_operations anon_inode_operations = {
> +	.getattr = anon_inode_getattr,
> +};
> +
>  /*
>   * anon_inodefs_dname() is called from d_path().
>   */
> @@ -66,6 +100,7 @@ static struct inode *anon_inode_make_secure_inode(
>  	if (IS_ERR(inode))
>  		return inode;
>  	inode->i_flags &= ~S_PRIVATE;
> +	inode->i_op = &anon_inode_operations;
>  	error =	security_inode_init_security_anon(inode, &QSTR(name),
>  						  context_inode);
>  	if (error) {
> @@ -313,6 +348,7 @@ static int __init anon_inode_init(void)
>  	anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
>  	if (IS_ERR(anon_inode_inode))
>  		panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
> +	anon_inode_inode->i_op = &anon_inode_operations;
>  
>  	return 0;
>  }
> diff --git a/fs/internal.h b/fs/internal.h
> index b9b3e29a73fd..717dc9eb6185 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -343,3 +343,6 @@ static inline bool path_mounted(const struct path *path)
>  void file_f_owner_release(struct file *file);
>  bool file_seek_cur_needs_f_lock(struct file *file);
>  int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
> +int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
> +		       struct kstat *stat, u32 request_mask,
> +		       unsigned int query_flags);
> diff --git a/fs/libfs.c b/fs/libfs.c
> index 6393d7c49ee6..0ad3336f5b49 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -1647,7 +1647,7 @@ struct inode *alloc_anon_inode(struct super_block *s)
>  	 * that it already _is_ on the dirty list.
>  	 */
>  	inode->i_state = I_DIRTY;
> -	inode->i_mode = S_IRUSR | S_IWUSR;
> +	inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
>  	inode->i_uid = current_fsuid();
>  	inode->i_gid = current_fsgid();
>  	inode->i_flags |= S_PRIVATE;
> 
> -- 
> 2.47.2
>
Mark Brown April 11, 2025, 10:31 a.m. UTC | #4
On Mon, Apr 07, 2025 at 11:54:15AM +0200, Christian Brauner wrote:
> This allows the VFS to not trip over anonymous inodes and we can add
> asserts based on the mode into the vfs. When we report it to userspace
> we can simply hide the mode to avoid regressions. I've audited all
> direct callers of alloc_anon_inode() and only secretmen overrides i_mode
> and i_op inode operations but it already uses a regular file.

We've been seeing failures in LTP's readadead01 in -next on arm64
platforms:

 4601 07:43:36.192033  tst_test.c:1900: TINFO: LTP version: 20250130-1-g60fe84aaf
 4602 07:43:36.201811  tst_test.c:1904: TINFO: Tested kernel: 6.15.0-rc1-next-20250410 #1 SMP PREEMPT Thu Apr 10 06:18:38 UTC 2025 aarch64
 4603 07:43:36.208400  tst_kconfig.c:88: TINFO: Parsing kernel config '/proc/config.gz'
 4604 07:43:36.218393  tst_test.c:1722: TINFO: Overall timeout per run is 0h 01m 30s
 4605 07:43:36.223886  readahead01.c:36: TPASS: readahead() with fd = -1 : EBADF (9)
 4606 07:43:36.229370  readahead01.c:43: TPASS: readahead() with invalid fd : EBADF (9)
 4607 07:43:36.234998  readahead01.c:64: TPASS: readahead() on O_PATH file : EBADF (9)
 4608 07:43:36.240527  readahead01.c:64: TPASS: readahead() on directory : EINVAL (22)
 4609 07:43:36.246118  readahead01.c:64: TPASS: readahead() on /dev/zero : EINVAL (22)
 4610 07:43:36.251530  readahead01.c:64: TPASS: readahead() on pipe read end : EINVAL (22)
 4611 07:43:36.260007  readahead01.c:64: TPASS: readahead() on pipe write end : EBADF (9)
 4612 07:43:36.265581  readahead01.c:64: TPASS: readahead() on unix socket : EINVAL (22)
 4613 07:43:36.270928  readahead01.c:64: TPASS: readahead() on inet socket : EINVAL (22)
 4614 07:43:36.276754  readahead01.c:64: TFAIL: readahead() on epoll succeeded
 4615 07:43:36.279460  readahead01.c:64: TFAIL: readahead() on eventfd succeeded
 4616 07:43:36.285053  readahead01.c:64: TFAIL: readahead() on signalfd succeeded
 4617 07:43:36.290504  readahead01.c:64: TFAIL: readahead() on timerfd succeeded
 4618 07:43:36.296220  readahead01.c:64: TFAIL: readahead() on fanotify succeeded
 4619 07:43:36.301605  readahead01.c:64: TFAIL: readahead() on inotify succeeded
 4620 07:43:36.307327  tst_fd.c:170: TCONF: Skipping userfaultfd: ENOSYS (38)
 4621 07:43:36.312806  readahead01.c:64: TFAIL: readahead() on perf event succeeded
 4622 07:43:36.318534  readahead01.c:64: TFAIL: readahead() on io uring succeeded
 4623 07:43:36.321511  readahead01.c:64: TFAIL: readahead() on bpf map succeeded
 4624 07:43:36.325711  readahead01.c:64: TFAIL: readahead() on fsopen succeeded
 4625 07:43:36.331073  readahead01.c:64: TFAIL: readahead() on fspick succeeded
 4626 07:43:36.336608  readahead01.c:64: TPASS: readahead() on open_tree : EBADF (9)
 4627 07:43:36.336903  
 4628 07:43:36.339354  Summary:
 4629 07:43:36.339641  passed   10
 4630 07:43:36.342132  failed   11
 4631 07:43:36.342420  broken   0
 4632 07:43:36.342648  skipped  1
 4633 07:43:36.344768  warnings 0

which bisected down to this patch, which is cfd86ef7e8e7b9e01 in -next:

git bisect start
# status: waiting for both good and bad commits
# bad: [29e7bf01ed8033c9a14ed0dc990dfe2736dbcd18] Add linux-next specific files for 20250410
git bisect bad 29e7bf01ed8033c9a14ed0dc990dfe2736dbcd18
# status: waiting for good commit(s), bad commit known
# good: [1785a3a7b96a52fae13880a5ba880a5f473eacb1] Merge branch 'for-linux-next-fixes' of https://gitlab.freedesktop.org/drm/misc/kernel.git
git bisect good 1785a3a7b96a52fae13880a5ba880a5f473eacb1
# bad: [793874436825ebf3dfeeac34b75682c234cf61ef] Merge branch 'for-linux-next' of https://gitlab.freedesktop.org/drm/misc/kernel.git
git bisect bad 793874436825ebf3dfeeac34b75682c234cf61ef
# bad: [f8b5c1664191e453611f77d36ba21b09bc468a2d] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git
git bisect bad f8b5c1664191e453611f77d36ba21b09bc468a2d
# good: [100ac6e209fce471f3ff4d4e92f9d192fcfa7637] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
git bisect good 100ac6e209fce471f3ff4d4e92f9d192fcfa7637
# bad: [143ced925e31fe24e820866403276492f05efaa5] Merge branch 'vfs.all' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
git bisect bad 143ced925e31fe24e820866403276492f05efaa5
# good: [b087fb728fdda75e1d3e83aa542d3aa025ac6c4a] Merge branch 'nfsd-next' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
git bisect good b087fb728fdda75e1d3e83aa542d3aa025ac6c4a
# good: [7ee85aeee98e85f72a663672267180218d1510db] Merge branch 'vfs-6.16.super' into vfs.all
git bisect good 7ee85aeee98e85f72a663672267180218d1510db
# bad: [d57e6ea6671b1ef0fcb09ccc52952c8a6bfb83c8] Merge branch 'vfs-6.16.misc' into vfs.all
git bisect bad d57e6ea6671b1ef0fcb09ccc52952c8a6bfb83c8
# bad: [25a6cc9a630b4b1b783903b23a3a97c5bf16bf41] selftests/filesystems: add open() test for anonymous inodes
git bisect bad 25a6cc9a630b4b1b783903b23a3a97c5bf16bf41
# bad: [c83b9024966090fe0df92aab16975b8d00089e1f] pidfs: use anon_inode_setattr()
git bisect bad c83b9024966090fe0df92aab16975b8d00089e1f
# bad: [cfd86ef7e8e7b9e015707e46479a6b1de141eed0] anon_inode: use a proper mode internally
git bisect bad cfd86ef7e8e7b9e015707e46479a6b1de141eed0
# good: [418556fa576ebbd644c7258a97b33203956ea232] docs: initramfs: update compression and mtime descriptions
git bisect good 418556fa576ebbd644c7258a97b33203956ea232
# first bad commit: [cfd86ef7e8e7b9e015707e46479a6b1de141eed0] anon_inode: use a proper mode internally
Christian Brauner April 11, 2025, 3:03 p.m. UTC | #5
On Fri, Apr 11, 2025 at 11:31:24AM +0100, Mark Brown wrote:
> On Mon, Apr 07, 2025 at 11:54:15AM +0200, Christian Brauner wrote:
> > This allows the VFS to not trip over anonymous inodes and we can add
> > asserts based on the mode into the vfs. When we report it to userspace
> > we can simply hide the mode to avoid regressions. I've audited all
> > direct callers of alloc_anon_inode() and only secretmen overrides i_mode
> > and i_op inode operations but it already uses a regular file.
> 
> We've been seeing failures in LTP's readadead01 in -next on arm64
> platforms:

This fscking readhead garbage is driving me insane.
Ok, readahead skipped anonymous inodes because it's checking whether it
is a regular file or not. We now make them regular files internally.
Should be fixed in -next tomorrow.

> 
>  4601 07:43:36.192033  tst_test.c:1900: TINFO: LTP version: 20250130-1-g60fe84aaf
>  4602 07:43:36.201811  tst_test.c:1904: TINFO: Tested kernel: 6.15.0-rc1-next-20250410 #1 SMP PREEMPT Thu Apr 10 06:18:38 UTC 2025 aarch64
>  4603 07:43:36.208400  tst_kconfig.c:88: TINFO: Parsing kernel config '/proc/config.gz'
>  4604 07:43:36.218393  tst_test.c:1722: TINFO: Overall timeout per run is 0h 01m 30s
>  4605 07:43:36.223886  readahead01.c:36: TPASS: readahead() with fd = -1 : EBADF (9)
>  4606 07:43:36.229370  readahead01.c:43: TPASS: readahead() with invalid fd : EBADF (9)
>  4607 07:43:36.234998  readahead01.c:64: TPASS: readahead() on O_PATH file : EBADF (9)
>  4608 07:43:36.240527  readahead01.c:64: TPASS: readahead() on directory : EINVAL (22)
>  4609 07:43:36.246118  readahead01.c:64: TPASS: readahead() on /dev/zero : EINVAL (22)
>  4610 07:43:36.251530  readahead01.c:64: TPASS: readahead() on pipe read end : EINVAL (22)
>  4611 07:43:36.260007  readahead01.c:64: TPASS: readahead() on pipe write end : EBADF (9)
>  4612 07:43:36.265581  readahead01.c:64: TPASS: readahead() on unix socket : EINVAL (22)
>  4613 07:43:36.270928  readahead01.c:64: TPASS: readahead() on inet socket : EINVAL (22)
>  4614 07:43:36.276754  readahead01.c:64: TFAIL: readahead() on epoll succeeded
>  4615 07:43:36.279460  readahead01.c:64: TFAIL: readahead() on eventfd succeeded
>  4616 07:43:36.285053  readahead01.c:64: TFAIL: readahead() on signalfd succeeded
>  4617 07:43:36.290504  readahead01.c:64: TFAIL: readahead() on timerfd succeeded
>  4618 07:43:36.296220  readahead01.c:64: TFAIL: readahead() on fanotify succeeded
>  4619 07:43:36.301605  readahead01.c:64: TFAIL: readahead() on inotify succeeded
>  4620 07:43:36.307327  tst_fd.c:170: TCONF: Skipping userfaultfd: ENOSYS (38)
>  4621 07:43:36.312806  readahead01.c:64: TFAIL: readahead() on perf event succeeded
>  4622 07:43:36.318534  readahead01.c:64: TFAIL: readahead() on io uring succeeded
>  4623 07:43:36.321511  readahead01.c:64: TFAIL: readahead() on bpf map succeeded
>  4624 07:43:36.325711  readahead01.c:64: TFAIL: readahead() on fsopen succeeded
>  4625 07:43:36.331073  readahead01.c:64: TFAIL: readahead() on fspick succeeded
>  4626 07:43:36.336608  readahead01.c:64: TPASS: readahead() on open_tree : EBADF (9)
>  4627 07:43:36.336903  
>  4628 07:43:36.339354  Summary:
>  4629 07:43:36.339641  passed   10
>  4630 07:43:36.342132  failed   11
>  4631 07:43:36.342420  broken   0
>  4632 07:43:36.342648  skipped  1
>  4633 07:43:36.344768  warnings 0
> 
> which bisected down to this patch, which is cfd86ef7e8e7b9e01 in -next:
> 
> git bisect start
> # status: waiting for both good and bad commits
> # bad: [29e7bf01ed8033c9a14ed0dc990dfe2736dbcd18] Add linux-next specific files for 20250410
> git bisect bad 29e7bf01ed8033c9a14ed0dc990dfe2736dbcd18
> # status: waiting for good commit(s), bad commit known
> # good: [1785a3a7b96a52fae13880a5ba880a5f473eacb1] Merge branch 'for-linux-next-fixes' of https://gitlab.freedesktop.org/drm/misc/kernel.git
> git bisect good 1785a3a7b96a52fae13880a5ba880a5f473eacb1
> # bad: [793874436825ebf3dfeeac34b75682c234cf61ef] Merge branch 'for-linux-next' of https://gitlab.freedesktop.org/drm/misc/kernel.git
> git bisect bad 793874436825ebf3dfeeac34b75682c234cf61ef
> # bad: [f8b5c1664191e453611f77d36ba21b09bc468a2d] Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git
> git bisect bad f8b5c1664191e453611f77d36ba21b09bc468a2d
> # good: [100ac6e209fce471f3ff4d4e92f9d192fcfa7637] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
> git bisect good 100ac6e209fce471f3ff4d4e92f9d192fcfa7637
> # bad: [143ced925e31fe24e820866403276492f05efaa5] Merge branch 'vfs.all' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
> git bisect bad 143ced925e31fe24e820866403276492f05efaa5
> # good: [b087fb728fdda75e1d3e83aa542d3aa025ac6c4a] Merge branch 'nfsd-next' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
> git bisect good b087fb728fdda75e1d3e83aa542d3aa025ac6c4a
> # good: [7ee85aeee98e85f72a663672267180218d1510db] Merge branch 'vfs-6.16.super' into vfs.all
> git bisect good 7ee85aeee98e85f72a663672267180218d1510db
> # bad: [d57e6ea6671b1ef0fcb09ccc52952c8a6bfb83c8] Merge branch 'vfs-6.16.misc' into vfs.all
> git bisect bad d57e6ea6671b1ef0fcb09ccc52952c8a6bfb83c8
> # bad: [25a6cc9a630b4b1b783903b23a3a97c5bf16bf41] selftests/filesystems: add open() test for anonymous inodes
> git bisect bad 25a6cc9a630b4b1b783903b23a3a97c5bf16bf41
> # bad: [c83b9024966090fe0df92aab16975b8d00089e1f] pidfs: use anon_inode_setattr()
> git bisect bad c83b9024966090fe0df92aab16975b8d00089e1f
> # bad: [cfd86ef7e8e7b9e015707e46479a6b1de141eed0] anon_inode: use a proper mode internally
> git bisect bad cfd86ef7e8e7b9e015707e46479a6b1de141eed0
> # good: [418556fa576ebbd644c7258a97b33203956ea232] docs: initramfs: update compression and mtime descriptions
> git bisect good 418556fa576ebbd644c7258a97b33203956ea232
> # first bad commit: [cfd86ef7e8e7b9e015707e46479a6b1de141eed0] anon_inode: use a proper mode internally
Christoph Hellwig April 14, 2025, 5:50 a.m. UTC | #6
On Fri, Apr 11, 2025 at 05:03:55PM +0200, Christian Brauner wrote:
> On Fri, Apr 11, 2025 at 11:31:24AM +0100, Mark Brown wrote:
> > On Mon, Apr 07, 2025 at 11:54:15AM +0200, Christian Brauner wrote:
> > > This allows the VFS to not trip over anonymous inodes and we can add
> > > asserts based on the mode into the vfs. When we report it to userspace
> > > we can simply hide the mode to avoid regressions. I've audited all
> > > direct callers of alloc_anon_inode() and only secretmen overrides i_mode
> > > and i_op inode operations but it already uses a regular file.
> > 
> > We've been seeing failures in LTP's readadead01 in -next on arm64
> > platforms:
> 
> This fscking readhead garbage is driving me insane.
> Ok, readahead skipped anonymous inodes because it's checking whether it
> is a regular file or not. We now make them regular files internally.
> Should be fixed in -next tomorrow.

Is this the readahead syscall?  Yeah that random check in the high level
code looks odd if that's what is being triggered here.
diff mbox series

Patch

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 583ac81669c2..42e4b9c34f89 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -24,9 +24,43 @@ 
 
 #include <linux/uaccess.h>
 
+#include "internal.h"
+
 static struct vfsmount *anon_inode_mnt __ro_after_init;
 static struct inode *anon_inode_inode __ro_after_init;
 
+/*
+ * User space expects anonymous inodes to have no file type in st_mode.
+ *
+ * In particular, 'lsof' has this legacy logic:
+ *
+ *	type = s->st_mode & S_IFMT;
+ *	switch (type) {
+ *	  ...
+ *	case 0:
+ *		if (!strcmp(p, "anon_inode"))
+ *			Lf->ntype = Ntype = N_ANON_INODE;
+ *
+ * to detect our old anon_inode logic.
+ *
+ * Rather than mess with our internal sane inode data, just fix it
+ * up here in getattr() by masking off the format bits.
+ */
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+		       struct kstat *stat, u32 request_mask,
+		       unsigned int query_flags)
+{
+	struct inode *inode = d_inode(path->dentry);
+
+	generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
+	stat->mode &= ~S_IFMT;
+	return 0;
+}
+
+static const struct inode_operations anon_inode_operations = {
+	.getattr = anon_inode_getattr,
+};
+
 /*
  * anon_inodefs_dname() is called from d_path().
  */
@@ -66,6 +100,7 @@  static struct inode *anon_inode_make_secure_inode(
 	if (IS_ERR(inode))
 		return inode;
 	inode->i_flags &= ~S_PRIVATE;
+	inode->i_op = &anon_inode_operations;
 	error =	security_inode_init_security_anon(inode, &QSTR(name),
 						  context_inode);
 	if (error) {
@@ -313,6 +348,7 @@  static int __init anon_inode_init(void)
 	anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
 	if (IS_ERR(anon_inode_inode))
 		panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
+	anon_inode_inode->i_op = &anon_inode_operations;
 
 	return 0;
 }
diff --git a/fs/internal.h b/fs/internal.h
index b9b3e29a73fd..717dc9eb6185 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -343,3 +343,6 @@  static inline bool path_mounted(const struct path *path)
 void file_f_owner_release(struct file *file);
 bool file_seek_cur_needs_f_lock(struct file *file);
 int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
+int anon_inode_getattr(struct mnt_idmap *idmap, const struct path *path,
+		       struct kstat *stat, u32 request_mask,
+		       unsigned int query_flags);
diff --git a/fs/libfs.c b/fs/libfs.c
index 6393d7c49ee6..0ad3336f5b49 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1647,7 +1647,7 @@  struct inode *alloc_anon_inode(struct super_block *s)
 	 * that it already _is_ on the dirty list.
 	 */
 	inode->i_state = I_DIRTY;
-	inode->i_mode = S_IRUSR | S_IWUSR;
+	inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
 	inode->i_flags |= S_PRIVATE;