diff mbox series

[v9,1/4] generic/631: add test for detached mount propagation

Message ID 20210316103627.2954121-2-christian.brauner@ubuntu.com (mailing list archive)
State New, archived
Headers show
Series fstests: add idmapped mounts tests | expand

Commit Message

Christian Brauner March 16, 2021, 10:36 a.m. UTC
Regression test to verify that creating a series of detached mounts,
attaching them to the filesystem, and unmounting them does not trigger an
integer overflow in ns->mounts causing the kernel to block any new mounts in
count_mounts() and returning ENOSPC because it falsely assumes that the
maximum number of mounts in the mount namespace has been reached, i.e. it
thinks it can't fit the new mounts into the mount namespace anymore.

The test is written in a way that it will leave the host's mount
namespace intact so we are sure to never make the host's mount namespace
unuseable!

Link: https://git.kernel.org/torvalds/c/ee2e3f50629f17b0752b55b2566c15ce8dafb557
Cc: Christoph Hellwig <hch@lst.de>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v1 */
patch not present

/* v2 */
patch not present

/* v3 */
patch not present

/* v4 */
patch not present

/* v5 */
patch not present

/* v6 */
patch not present

/* v7 */
patch not present

/* v8 */
patch introduced

/* v9 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - Rebased on current master.
---
 .gitignore                        |   1 +
 src/Makefile                      |   3 +-
 src/detached_mounts_propagation.c | 252 ++++++++++++++++++++++++++++++
 tests/generic/631                 |  41 +++++
 tests/generic/631.out             |   2 +
 tests/generic/group               |   1 +
 6 files changed, 299 insertions(+), 1 deletion(-)
 create mode 100644 src/detached_mounts_propagation.c
 create mode 100644 tests/generic/631
 create mode 100644 tests/generic/631.out

Comments

Christoph Hellwig March 18, 2021, 6:23 a.m. UTC | #1
> +/* open_tree() */
> +#ifndef OPEN_TREE_CLONE
> +#define OPEN_TREE_CLONE 1
> +#endif
> +
> +#ifndef OPEN_TREE_CLOEXEC
> +#define OPEN_TREE_CLOEXEC O_CLOEXEC
> +#endif
> +
> +#ifndef __NR_open_tree
> +	#if defined __alpha__
> +		#define __NR_open_tree 538
> +	#elif defined _MIPS_SIM
> +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> +			#define __NR_open_tree 4428
> +		#endif
> +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> +			#define __NR_open_tree 6428
> +		#endif
> +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> +			#define __NR_open_tree 5428
> +		#endif
> +	#elif defined __ia64__
> +		#define __NR_open_tree (428 + 1024)
> +	#else
> +		#define __NR_open_tree 428
> +	#endif
> +#endif
> +
> +/* move_mount() */
> +#ifndef MOVE_MOUNT_F_EMPTY_PATH
> +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
> +#endif
> +
> +#ifndef __NR_move_mount
> +	#if defined __alpha__
> +		#define __NR_move_mount 539
> +	#elif defined _MIPS_SIM
> +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> +			#define __NR_move_mount 4429
> +		#endif
> +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> +			#define __NR_move_mount 6429
> +		#endif
> +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> +			#define __NR_move_mount 5429
> +		#endif
> +	#elif defined __ia64__
> +		#define __NR_move_mount (428 + 1024)
> +	#else
> +		#define __NR_move_mount 429
> +	#endif
> +#endif
> +
> +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
> +{
> +	return syscall(__NR_open_tree, dfd, filename, flags);
> +}
> +
> +static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
> +				 const char *to_pathname, unsigned int flags)
> +{
> +	return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
> +}

Not directly new in your patch, but I wish we'd have a central header
for missing UAPI bits in xfstests instead of spreading it over
various files.

Otherwise looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Darrick J. Wong March 18, 2021, 3:02 p.m. UTC | #2
On Thu, Mar 18, 2021 at 07:23:10AM +0100, Christoph Hellwig wrote:
> > +/* open_tree() */
> > +#ifndef OPEN_TREE_CLONE
> > +#define OPEN_TREE_CLONE 1
> > +#endif
> > +
> > +#ifndef OPEN_TREE_CLOEXEC
> > +#define OPEN_TREE_CLOEXEC O_CLOEXEC
> > +#endif
> > +
> > +#ifndef __NR_open_tree
> > +	#if defined __alpha__
> > +		#define __NR_open_tree 538
> > +	#elif defined _MIPS_SIM
> > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > +			#define __NR_open_tree 4428
> > +		#endif
> > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > +			#define __NR_open_tree 6428
> > +		#endif
> > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > +			#define __NR_open_tree 5428
> > +		#endif
> > +	#elif defined __ia64__
> > +		#define __NR_open_tree (428 + 1024)
> > +	#else
> > +		#define __NR_open_tree 428
> > +	#endif
> > +#endif
> > +
> > +/* move_mount() */
> > +#ifndef MOVE_MOUNT_F_EMPTY_PATH
> > +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
> > +#endif
> > +
> > +#ifndef __NR_move_mount
> > +	#if defined __alpha__
> > +		#define __NR_move_mount 539
> > +	#elif defined _MIPS_SIM
> > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > +			#define __NR_move_mount 4429
> > +		#endif
> > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > +			#define __NR_move_mount 6429
> > +		#endif
> > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > +			#define __NR_move_mount 5429
> > +		#endif
> > +	#elif defined __ia64__
> > +		#define __NR_move_mount (428 + 1024)
> > +	#else
> > +		#define __NR_move_mount 429
> > +	#endif
> > +#endif
> > +
> > +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
> > +{
> > +	return syscall(__NR_open_tree, dfd, filename, flags);
> > +}
> > +
> > +static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
> > +				 const char *to_pathname, unsigned int flags)
> > +{
> > +	return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
> > +}
> 
> Not directly new in your patch, but I wish we'd have a central header
> for missing UAPI bits in xfstests instead of spreading it over
> various files.

FWIW if you (or anyone else) /does/ create a src/linux_calls.h or
similar for things like this, I will totally start adding things to it
instead of scattering them everywhere. :)

--D

> Otherwise looks good:
> 
> Reviewed-by: Christoph Hellwig <hch@lst.de>
Christian Brauner March 18, 2021, 3:14 p.m. UTC | #3
On Thu, Mar 18, 2021 at 08:02:30AM -0700, Darrick J. Wong wrote:
> On Thu, Mar 18, 2021 at 07:23:10AM +0100, Christoph Hellwig wrote:
> > > +/* open_tree() */
> > > +#ifndef OPEN_TREE_CLONE
> > > +#define OPEN_TREE_CLONE 1
> > > +#endif
> > > +
> > > +#ifndef OPEN_TREE_CLOEXEC
> > > +#define OPEN_TREE_CLOEXEC O_CLOEXEC
> > > +#endif
> > > +
> > > +#ifndef __NR_open_tree
> > > +	#if defined __alpha__
> > > +		#define __NR_open_tree 538
> > > +	#elif defined _MIPS_SIM
> > > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > > +			#define __NR_open_tree 4428
> > > +		#endif
> > > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > > +			#define __NR_open_tree 6428
> > > +		#endif
> > > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > > +			#define __NR_open_tree 5428
> > > +		#endif
> > > +	#elif defined __ia64__
> > > +		#define __NR_open_tree (428 + 1024)
> > > +	#else
> > > +		#define __NR_open_tree 428
> > > +	#endif
> > > +#endif
> > > +
> > > +/* move_mount() */
> > > +#ifndef MOVE_MOUNT_F_EMPTY_PATH
> > > +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
> > > +#endif
> > > +
> > > +#ifndef __NR_move_mount
> > > +	#if defined __alpha__
> > > +		#define __NR_move_mount 539
> > > +	#elif defined _MIPS_SIM
> > > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > > +			#define __NR_move_mount 4429
> > > +		#endif
> > > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > > +			#define __NR_move_mount 6429
> > > +		#endif
> > > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > > +			#define __NR_move_mount 5429
> > > +		#endif
> > > +	#elif defined __ia64__
> > > +		#define __NR_move_mount (428 + 1024)
> > > +	#else
> > > +		#define __NR_move_mount 429
> > > +	#endif
> > > +#endif
> > > +
> > > +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
> > > +{
> > > +	return syscall(__NR_open_tree, dfd, filename, flags);
> > > +}
> > > +
> > > +static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
> > > +				 const char *to_pathname, unsigned int flags)
> > > +{
> > > +	return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
> > > +}
> > 
> > Not directly new in your patch, but I wish we'd have a central header
> > for missing UAPI bits in xfstests instead of spreading it over
> > various files.
> 
> FWIW if you (or anyone else) /does/ create a src/linux_calls.h or
> similar for things like this, I will totally start adding things to it
> instead of scattering them everywhere. :)

I do usually have in my C projects two .h files (without corresponding
.c files):

1. syscall_numbers.h
   https://github.com/lxc/lxc/blob/master/src/lxc/syscall_numbers.h

   Where syscall_numbers defines the missing syscall numbers. For new
   syscalls in the style exhibited in this patch and for legacy syscalls
   it's a bit more nasty (i.e. syscalls before we unified numbering [apart
   from alpha]):
   
#ifndef __NR_memfd_create
	#if defined __i386__
		#define __NR_memfd_create 356
	#elif defined __x86_64__
		#define __NR_memfd_create 319
	#elif defined __arm__
		#define __NR_memfd_create 385
	#elif defined __aarch64__
		#define __NR_memfd_create 279
	#elif defined __s390__
		#define __NR_memfd_create 350
	#elif defined __powerpc__
		#define __NR_memfd_create 360
	#elif defined __riscv
		#define __NR_memfd_create 279
	#elif defined __sparc__
		#define __NR_memfd_create 348
	#elif defined __blackfin__
		#define __NR_memfd_create 390
	#elif defined __ia64__
		#define __NR_memfd_create 1340
	#elif defined _MIPS_SIM
		#if _MIPS_SIM == _MIPS_SIM_ABI32
			#define __NR_memfd_create 4354
		#endif
		#if _MIPS_SIM == _MIPS_SIM_NABI32
			#define __NR_memfd_create 6318
		#endif
		#if _MIPS_SIM == _MIPS_SIM_ABI64
			#define __NR_memfd_create 5314
		#endif
	#else
		#define -1
		#warning "__NR_memfd_create not defined for your architecture"
	#endif
#endif

2. syscall_wrappers.h
   https://github.com/lxc/lxc/blob/master/src/lxc/syscall_wrappers.h

   Where syscall_wrappers.h defines minimal wrappers for missing
   syscalls and their corresponding types.

#ifndef HAVE_SYS_SIGNALFD_H
struct signalfd_siginfo {
	uint32_t ssi_signo;
	int32_t ssi_errno;
	int32_t ssi_code;
	uint32_t ssi_pid;
	uint32_t ssi_uid;
	int32_t ssi_fd;
	uint32_t ssi_tid;
	uint32_t ssi_band;
	uint32_t ssi_overrun;
	uint32_t ssi_trapno;
	int32_t ssi_status;
	int32_t ssi_int;
	uint64_t ssi_ptr;
	uint64_t ssi_utime;
	uint64_t ssi_stime;
	uint64_t ssi_addr;
	uint8_t __pad[48];
};

static inline int signalfd(int fd, const sigset_t *mask, int flags)
{
	int retval;

	retval = syscall(__NR_signalfd4, fd, mask, _NSIG / 8, flags);
#ifdef __NR_signalfd
	if (errno == ENOSYS && flags == 0)
		retval = syscall(__NR_signalfd, fd, mask, _NSIG / 8);
#endif

	return retval;
}
#endif

Something like this might also work for xfstests.

Christian
Darrick J. Wong March 18, 2021, 3:57 p.m. UTC | #4
On Thu, Mar 18, 2021 at 04:14:44PM +0100, Christian Brauner wrote:
> On Thu, Mar 18, 2021 at 08:02:30AM -0700, Darrick J. Wong wrote:
> > On Thu, Mar 18, 2021 at 07:23:10AM +0100, Christoph Hellwig wrote:
> > > > +/* open_tree() */
> > > > +#ifndef OPEN_TREE_CLONE
> > > > +#define OPEN_TREE_CLONE 1
> > > > +#endif
> > > > +
> > > > +#ifndef OPEN_TREE_CLOEXEC
> > > > +#define OPEN_TREE_CLOEXEC O_CLOEXEC
> > > > +#endif
> > > > +
> > > > +#ifndef __NR_open_tree
> > > > +	#if defined __alpha__
> > > > +		#define __NR_open_tree 538
> > > > +	#elif defined _MIPS_SIM
> > > > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > > > +			#define __NR_open_tree 4428
> > > > +		#endif
> > > > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > > > +			#define __NR_open_tree 6428
> > > > +		#endif
> > > > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > > > +			#define __NR_open_tree 5428
> > > > +		#endif
> > > > +	#elif defined __ia64__
> > > > +		#define __NR_open_tree (428 + 1024)
> > > > +	#else
> > > > +		#define __NR_open_tree 428
> > > > +	#endif
> > > > +#endif
> > > > +
> > > > +/* move_mount() */
> > > > +#ifndef MOVE_MOUNT_F_EMPTY_PATH
> > > > +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
> > > > +#endif
> > > > +
> > > > +#ifndef __NR_move_mount
> > > > +	#if defined __alpha__
> > > > +		#define __NR_move_mount 539
> > > > +	#elif defined _MIPS_SIM
> > > > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > > > +			#define __NR_move_mount 4429
> > > > +		#endif
> > > > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > > > +			#define __NR_move_mount 6429
> > > > +		#endif
> > > > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > > > +			#define __NR_move_mount 5429
> > > > +		#endif
> > > > +	#elif defined __ia64__
> > > > +		#define __NR_move_mount (428 + 1024)
> > > > +	#else
> > > > +		#define __NR_move_mount 429
> > > > +	#endif
> > > > +#endif
> > > > +
> > > > +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
> > > > +{
> > > > +	return syscall(__NR_open_tree, dfd, filename, flags);
> > > > +}
> > > > +
> > > > +static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
> > > > +				 const char *to_pathname, unsigned int flags)
> > > > +{
> > > > +	return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
> > > > +}
> > > 
> > > Not directly new in your patch, but I wish we'd have a central header
> > > for missing UAPI bits in xfstests instead of spreading it over
> > > various files.
> > 
> > FWIW if you (or anyone else) /does/ create a src/linux_calls.h or
> > similar for things like this, I will totally start adding things to it
> > instead of scattering them everywhere. :)
> 
> I do usually have in my C projects two .h files (without corresponding
> .c files):
> 
> 1. syscall_numbers.h
>    https://github.com/lxc/lxc/blob/master/src/lxc/syscall_numbers.h
> 
>    Where syscall_numbers defines the missing syscall numbers. For new
>    syscalls in the style exhibited in this patch and for legacy syscalls
>    it's a bit more nasty (i.e. syscalls before we unified numbering [apart
>    from alpha]):
>    
> #ifndef __NR_memfd_create
> 	#if defined __i386__
> 		#define __NR_memfd_create 356
> 	#elif defined __x86_64__
> 		#define __NR_memfd_create 319
> 	#elif defined __arm__
> 		#define __NR_memfd_create 385
> 	#elif defined __aarch64__
> 		#define __NR_memfd_create 279
> 	#elif defined __s390__
> 		#define __NR_memfd_create 350
> 	#elif defined __powerpc__
> 		#define __NR_memfd_create 360
> 	#elif defined __riscv
> 		#define __NR_memfd_create 279
> 	#elif defined __sparc__
> 		#define __NR_memfd_create 348
> 	#elif defined __blackfin__
> 		#define __NR_memfd_create 390
> 	#elif defined __ia64__
> 		#define __NR_memfd_create 1340
> 	#elif defined _MIPS_SIM
> 		#if _MIPS_SIM == _MIPS_SIM_ABI32
> 			#define __NR_memfd_create 4354
> 		#endif
> 		#if _MIPS_SIM == _MIPS_SIM_NABI32
> 			#define __NR_memfd_create 6318
> 		#endif
> 		#if _MIPS_SIM == _MIPS_SIM_ABI64
> 			#define __NR_memfd_create 5314
> 		#endif
> 	#else
> 		#define -1
> 		#warning "__NR_memfd_create not defined for your architecture"
> 	#endif
> #endif
> 
> 2. syscall_wrappers.h
>    https://github.com/lxc/lxc/blob/master/src/lxc/syscall_wrappers.h
> 
>    Where syscall_wrappers.h defines minimal wrappers for missing
>    syscalls and their corresponding types.
> 
> #ifndef HAVE_SYS_SIGNALFD_H
> struct signalfd_siginfo {
> 	uint32_t ssi_signo;
> 	int32_t ssi_errno;
> 	int32_t ssi_code;
> 	uint32_t ssi_pid;
> 	uint32_t ssi_uid;
> 	int32_t ssi_fd;
> 	uint32_t ssi_tid;
> 	uint32_t ssi_band;
> 	uint32_t ssi_overrun;
> 	uint32_t ssi_trapno;
> 	int32_t ssi_status;
> 	int32_t ssi_int;
> 	uint64_t ssi_ptr;
> 	uint64_t ssi_utime;
> 	uint64_t ssi_stime;
> 	uint64_t ssi_addr;
> 	uint8_t __pad[48];
> };
> 
> static inline int signalfd(int fd, const sigset_t *mask, int flags)
> {
> 	int retval;
> 
> 	retval = syscall(__NR_signalfd4, fd, mask, _NSIG / 8, flags);
> #ifdef __NR_signalfd
> 	if (errno == ENOSYS && flags == 0)
> 		retval = syscall(__NR_signalfd, fd, mask, _NSIG / 8);
> #endif
> 
> 	return retval;
> }
> #endif
> 
> Something like this might also work for xfstests.

Yeah, not 5 minutes later someone emailed me a bug report that fstests
doesn't build on Ubuntu 16.04 due to its kernel headers not knowing
about fsxattr, so I guess I'll have a patch out adding this exact header
file later today.

--D

> 
> Christian
Darrick J. Wong March 18, 2021, 4:31 p.m. UTC | #5
On Thu, Mar 18, 2021 at 08:57:19AM -0700, Darrick J. Wong wrote:
> On Thu, Mar 18, 2021 at 04:14:44PM +0100, Christian Brauner wrote:
> > On Thu, Mar 18, 2021 at 08:02:30AM -0700, Darrick J. Wong wrote:
> > > On Thu, Mar 18, 2021 at 07:23:10AM +0100, Christoph Hellwig wrote:
> > > > > +/* open_tree() */
> > > > > +#ifndef OPEN_TREE_CLONE
> > > > > +#define OPEN_TREE_CLONE 1
> > > > > +#endif
> > > > > +
> > > > > +#ifndef OPEN_TREE_CLOEXEC
> > > > > +#define OPEN_TREE_CLOEXEC O_CLOEXEC
> > > > > +#endif
> > > > > +
> > > > > +#ifndef __NR_open_tree
> > > > > +	#if defined __alpha__
> > > > > +		#define __NR_open_tree 538
> > > > > +	#elif defined _MIPS_SIM
> > > > > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > > > > +			#define __NR_open_tree 4428
> > > > > +		#endif
> > > > > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > > > > +			#define __NR_open_tree 6428
> > > > > +		#endif
> > > > > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > > > > +			#define __NR_open_tree 5428
> > > > > +		#endif
> > > > > +	#elif defined __ia64__
> > > > > +		#define __NR_open_tree (428 + 1024)
> > > > > +	#else
> > > > > +		#define __NR_open_tree 428
> > > > > +	#endif
> > > > > +#endif
> > > > > +
> > > > > +/* move_mount() */
> > > > > +#ifndef MOVE_MOUNT_F_EMPTY_PATH
> > > > > +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
> > > > > +#endif
> > > > > +
> > > > > +#ifndef __NR_move_mount
> > > > > +	#if defined __alpha__
> > > > > +		#define __NR_move_mount 539
> > > > > +	#elif defined _MIPS_SIM
> > > > > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > > > > +			#define __NR_move_mount 4429
> > > > > +		#endif
> > > > > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > > > > +			#define __NR_move_mount 6429
> > > > > +		#endif
> > > > > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > > > > +			#define __NR_move_mount 5429
> > > > > +		#endif
> > > > > +	#elif defined __ia64__
> > > > > +		#define __NR_move_mount (428 + 1024)
> > > > > +	#else
> > > > > +		#define __NR_move_mount 429
> > > > > +	#endif
> > > > > +#endif
> > > > > +
> > > > > +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
> > > > > +{
> > > > > +	return syscall(__NR_open_tree, dfd, filename, flags);
> > > > > +}
> > > > > +
> > > > > +static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
> > > > > +				 const char *to_pathname, unsigned int flags)
> > > > > +{
> > > > > +	return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
> > > > > +}
> > > > 
> > > > Not directly new in your patch, but I wish we'd have a central header
> > > > for missing UAPI bits in xfstests instead of spreading it over
> > > > various files.
> > > 
> > > FWIW if you (or anyone else) /does/ create a src/linux_calls.h or
> > > similar for things like this, I will totally start adding things to it
> > > instead of scattering them everywhere. :)
> > 
> > I do usually have in my C projects two .h files (without corresponding
> > .c files):
> > 
> > 1. syscall_numbers.h
> >    https://github.com/lxc/lxc/blob/master/src/lxc/syscall_numbers.h
> > 
> >    Where syscall_numbers defines the missing syscall numbers. For new
> >    syscalls in the style exhibited in this patch and for legacy syscalls
> >    it's a bit more nasty (i.e. syscalls before we unified numbering [apart
> >    from alpha]):
> >    
> > #ifndef __NR_memfd_create
> > 	#if defined __i386__
> > 		#define __NR_memfd_create 356
> > 	#elif defined __x86_64__
> > 		#define __NR_memfd_create 319
> > 	#elif defined __arm__
> > 		#define __NR_memfd_create 385
> > 	#elif defined __aarch64__
> > 		#define __NR_memfd_create 279
> > 	#elif defined __s390__
> > 		#define __NR_memfd_create 350
> > 	#elif defined __powerpc__
> > 		#define __NR_memfd_create 360
> > 	#elif defined __riscv
> > 		#define __NR_memfd_create 279
> > 	#elif defined __sparc__
> > 		#define __NR_memfd_create 348
> > 	#elif defined __blackfin__
> > 		#define __NR_memfd_create 390
> > 	#elif defined __ia64__
> > 		#define __NR_memfd_create 1340
> > 	#elif defined _MIPS_SIM
> > 		#if _MIPS_SIM == _MIPS_SIM_ABI32
> > 			#define __NR_memfd_create 4354
> > 		#endif
> > 		#if _MIPS_SIM == _MIPS_SIM_NABI32
> > 			#define __NR_memfd_create 6318
> > 		#endif
> > 		#if _MIPS_SIM == _MIPS_SIM_ABI64
> > 			#define __NR_memfd_create 5314
> > 		#endif
> > 	#else
> > 		#define -1
> > 		#warning "__NR_memfd_create not defined for your architecture"
> > 	#endif
> > #endif
> > 
> > 2. syscall_wrappers.h
> >    https://github.com/lxc/lxc/blob/master/src/lxc/syscall_wrappers.h
> > 
> >    Where syscall_wrappers.h defines minimal wrappers for missing
> >    syscalls and their corresponding types.
> > 
> > #ifndef HAVE_SYS_SIGNALFD_H
> > struct signalfd_siginfo {
> > 	uint32_t ssi_signo;
> > 	int32_t ssi_errno;
> > 	int32_t ssi_code;
> > 	uint32_t ssi_pid;
> > 	uint32_t ssi_uid;
> > 	int32_t ssi_fd;
> > 	uint32_t ssi_tid;
> > 	uint32_t ssi_band;
> > 	uint32_t ssi_overrun;
> > 	uint32_t ssi_trapno;
> > 	int32_t ssi_status;
> > 	int32_t ssi_int;
> > 	uint64_t ssi_ptr;
> > 	uint64_t ssi_utime;
> > 	uint64_t ssi_stime;
> > 	uint64_t ssi_addr;
> > 	uint8_t __pad[48];
> > };
> > 
> > static inline int signalfd(int fd, const sigset_t *mask, int flags)
> > {
> > 	int retval;
> > 
> > 	retval = syscall(__NR_signalfd4, fd, mask, _NSIG / 8, flags);
> > #ifdef __NR_signalfd
> > 	if (errno == ENOSYS && flags == 0)
> > 		retval = syscall(__NR_signalfd, fd, mask, _NSIG / 8);
> > #endif
> > 
> > 	return retval;
> > }
> > #endif
> > 
> > Something like this might also work for xfstests.
> 
> Yeah, not 5 minutes later someone emailed me a bug report that fstests
> doesn't build on Ubuntu 16.04 due to its kernel headers not knowing
> about fsxattr, so I guess I'll have a patch out adding this exact header
> file later today.

Or, as my own notes reminded me, put all that stuff in a header file,
and include it from src/global.h if the system libraries didn't pull it
in.

--D

> --D
> 
> > 
> > Christian
Eryu Guan March 21, 2021, 2:28 p.m. UTC | #6
On Tue, Mar 16, 2021 at 11:36:24AM +0100, Christian Brauner wrote:
> Regression test to verify that creating a series of detached mounts,
> attaching them to the filesystem, and unmounting them does not trigger an
> integer overflow in ns->mounts causing the kernel to block any new mounts in
> count_mounts() and returning ENOSPC because it falsely assumes that the
> maximum number of mounts in the mount namespace has been reached, i.e. it
> thinks it can't fit the new mounts into the mount namespace anymore.
> 
> The test is written in a way that it will leave the host's mount
> namespace intact so we are sure to never make the host's mount namespace
> unuseable!
> 
> Link: https://git.kernel.org/torvalds/c/ee2e3f50629f17b0752b55b2566c15ce8dafb557
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: David Howells <dhowells@redhat.com>
> Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
> ---
> /* v1 */
> patch not present
> 
> /* v2 */
> patch not present
> 
> /* v3 */
> patch not present
> 
> /* v4 */
> patch not present
> 
> /* v5 */
> patch not present
> 
> /* v6 */
> patch not present
> 
> /* v7 */
> patch not present
> 
> /* v8 */
> patch introduced
> 
> /* v9 */
> - Christian Brauner <christian.brauner@ubuntu.com>:
>   - Rebased on current master.
> ---
>  .gitignore                        |   1 +
>  src/Makefile                      |   3 +-
>  src/detached_mounts_propagation.c | 252 ++++++++++++++++++++++++++++++
>  tests/generic/631                 |  41 +++++
>  tests/generic/631.out             |   2 +
>  tests/generic/group               |   1 +
>  6 files changed, 299 insertions(+), 1 deletion(-)
>  create mode 100644 src/detached_mounts_propagation.c
>  create mode 100644 tests/generic/631
>  create mode 100644 tests/generic/631.out
> 
> diff --git a/.gitignore b/.gitignore
> index f3bc0a4f..72bd40a0 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -62,6 +62,7 @@
>  /src/cloner
>  /src/dbtest
>  /src/deduperace
> +/src/detached_mounts_propagation
>  /src/devzero
>  /src/dio-interleaved
>  /src/dio-invalidate-cache
> diff --git a/src/Makefile b/src/Makefile
> index 3d729a34..99019e6e 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -29,7 +29,8 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
>  	attr-list-by-handle-cursor-test listxattr dio-interleaved t_dir_type \
>  	dio-invalidate-cache stat_test t_encrypted_d_revalidate \
>  	attr_replace_test swapon mkswap t_attr_corruption t_open_tmpfiles \
> -	fscrypt-crypt-util bulkstat_null_ocount splice-test chprojid_fail
> +	fscrypt-crypt-util bulkstat_null_ocount splice-test chprojid_fail \
> +	detached_mounts_propagation
>  
>  SUBDIRS = log-writes perf
>  
> diff --git a/src/detached_mounts_propagation.c b/src/detached_mounts_propagation.c
> new file mode 100644
> index 00000000..4a588e46
> --- /dev/null
> +++ b/src/detached_mounts_propagation.c
> @@ -0,0 +1,252 @@
> +/* SPDX-License-Identifier: LGPL-2.1+ */
> +/*
> + * Copyright (c) 2021 Christian Brauner <christian.brauner@ubuntu.com>
> + * All Rights Reserved.
> + *
> + * Regression test to verify that creating a series of detached mounts,
> + * attaching them to the filesystem, and unmounting them does not trigger an
> + * integer overflow in ns->mounts causing the kernel to block any new mounts in
> + * count_mounts() and returning ENOSPC because it falsely assumes that the
> + * maximum number of mounts in the mount namespace has been reached, i.e. it
> + * thinks it can't fit the new mounts into the mount namespace anymore.
> + */
> +
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <getopt.h>
> +#include <limits.h>
> +#include <sched.h>
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/mount.h>
> +#include <sys/stat.h>
> +#include <sys/syscall.h>
> +#include <sys/types.h>
> +#include <unistd.h>
> +
> +/* open_tree() */
> +#ifndef OPEN_TREE_CLONE
> +#define OPEN_TREE_CLONE 1
> +#endif
> +
> +#ifndef OPEN_TREE_CLOEXEC
> +#define OPEN_TREE_CLOEXEC O_CLOEXEC
> +#endif
> +
> +#ifndef __NR_open_tree
> +	#if defined __alpha__
> +		#define __NR_open_tree 538
> +	#elif defined _MIPS_SIM
> +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> +			#define __NR_open_tree 4428
> +		#endif
> +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> +			#define __NR_open_tree 6428
> +		#endif
> +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> +			#define __NR_open_tree 5428
> +		#endif
> +	#elif defined __ia64__
> +		#define __NR_open_tree (428 + 1024)
> +	#else
> +		#define __NR_open_tree 428
> +	#endif
> +#endif
> +
> +/* move_mount() */
> +#ifndef MOVE_MOUNT_F_EMPTY_PATH
> +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
> +#endif
> +
> +#ifndef __NR_move_mount
> +	#if defined __alpha__
> +		#define __NR_move_mount 539
> +	#elif defined _MIPS_SIM
> +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> +			#define __NR_move_mount 4429
> +		#endif
> +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> +			#define __NR_move_mount 6429
> +		#endif
> +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> +			#define __NR_move_mount 5429
> +		#endif
> +	#elif defined __ia64__
> +		#define __NR_move_mount (428 + 1024)
> +	#else
> +		#define __NR_move_mount 429
> +	#endif
> +#endif
> +
> +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
> +{
> +	return syscall(__NR_open_tree, dfd, filename, flags);
> +}
> +
> +static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
> +				 const char *to_pathname, unsigned int flags)
> +{
> +	return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
> +}
> +
> +static bool is_shared_mountpoint(const char *path)
> +{
> +	bool shared = false;
> +	FILE *f = NULL;
> +	char *line = NULL;
> +	int i;
> +	size_t len = 0;
> +
> +	f = fopen("/proc/self/mountinfo", "re");
> +	if (!f)
> +		return 0;
> +
> +	while (getline(&line, &len, f) > 0) {
> +		char *slider1, *slider2;
> +
> +		for (slider1 = line, i = 0; slider1 && i < 4; i++)
> +			slider1 = strchr(slider1 + 1, ' ');
> +
> +		if (!slider1)
> +			continue;
> +
> +		slider2 = strchr(slider1 + 1, ' ');
> +		if (!slider2)
> +			continue;
> +
> +		*slider2 = '\0';
> +		if (strcmp(slider1 + 1, path) == 0) {
> +			/* This is the path. Is it shared? */
> +			slider1 = strchr(slider2 + 1, ' ');
> +			if (slider1 && strstr(slider1, "shared:")) {
> +				shared = true;
> +				break;
> +			}
> +		}
> +	}
> +	fclose(f);
> +	free(line);
> +
> +	return shared;
> +}
> +
> +static void usage(void)
> +{
> +	const char *text = "mount-new [--recursive] <base-dir>\n";
> +	fprintf(stderr, "%s", text);
> +	_exit(EXIT_SUCCESS);
> +}
> +
> +#define exit_usage(format, ...)                              \
> +	({                                                   \
> +		fprintf(stderr, format "\n", ##__VA_ARGS__); \
> +		usage();                                     \
> +	})
> +
> +#define exit_log(format, ...)                                \
> +	({                                                   \
> +		fprintf(stderr, format "\n", ##__VA_ARGS__); \
> +		exit(EXIT_FAILURE);                          \
> +	})
> +
> +static const struct option longopts[] = {
> +	{"help",	no_argument,		0,	'a'},
> +	{ NULL,		no_argument,		0,	 0 },
> +};
> +
> +int main(int argc, char *argv[])
> +{
> +	int exit_code = EXIT_SUCCESS, index = 0;
> +	int dfd, fd_tree, new_argc, ret;
> +	char *base_dir;
> +	char *const *new_argv;
> +	char target[PATH_MAX];
> +
> +	while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
> +		switch (ret) {
> +		case 'a':
> +			/* fallthrough */
> +		default:
> +			usage();
> +		}
> +	}
> +
> +	new_argv = &argv[optind];
> +	new_argc = argc - optind;
> +	if (new_argc < 1)
> +		exit_usage("Missing base directory\n");
> +	base_dir = new_argv[0];
> +
> +	if (*base_dir != '/')
> +		exit_log("Please specify an absolute path");
> +
> +	/* Ensure that target is a shared mountpoint. */
> +	if (!is_shared_mountpoint(base_dir))
> +		exit_log("Please ensure that \"%s\" is a shared mountpoint", base_dir);
> +
> +	ret = unshare(CLONE_NEWNS);
> +	if (ret < 0)
> +		exit_log("%m - Failed to create new mount namespace");
> +
> +	ret = mount(NULL, base_dir, NULL, MS_REC | MS_SHARED, NULL);
> +	if (ret < 0)
> +		exit_log("%m - Failed to make base_dir shared mountpoint");
> +
> +	dfd = open(base_dir, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
> +	if (dfd < 0)
> +		exit_log("%m - Failed to open base directory \"%s\"", base_dir);
> +
> +	ret = mkdirat(dfd, "detached-move-mount", 0755);
> +	if (ret < 0)
> +		exit_log("%m - Failed to create required temporary directories");
> +
> +	ret = snprintf(target, sizeof(target), "%s/detached-move-mount", base_dir);
> +	if (ret < 0 || (size_t)ret >= sizeof(target))
> +		exit_log("%m - Failed to assemble target path");
> +
> +	/*
> +	 * Having a mount table with 10000 mounts is already quite excessive
> +	 * and shoult account even for weird test systems.
> +	 */
> +	for (size_t i = 0; i < 10000; i++) {
> +		fd_tree = sys_open_tree(dfd, "detached-move-mount",
> +					OPEN_TREE_CLONE |
> +					OPEN_TREE_CLOEXEC |
> +					AT_EMPTY_PATH);
> +		if (fd_tree < 0) {
> +			if (errno == ENOSYS) /* New mount API not (fully) supported. */
> +				break;
> +
> +			fprintf(stderr, "%m - Failed to open %d(detached-move-mount)", dfd);
> +			exit_code = EXIT_FAILURE;
> +			break;
> +		}
> +
> +		ret = sys_move_mount(fd_tree, "", dfd, "detached-move-mount", MOVE_MOUNT_F_EMPTY_PATH);
> +		if (ret < 0) {
> +			if (errno == ENOSPC)
> +				fprintf(stderr, "%m - Buggy mount counting");
> +			else if (errno == ENOSYS) /* New mount API not (fully) supported. */
> +				break;
> +			else
> +				fprintf(stderr, "%m - Failed to attach mount to %d(detached-move-mount)", dfd);
> +			exit_code = EXIT_FAILURE;
> +			break;
> +		}
> +		close(fd_tree);
> +
> +		ret = umount2(target, MNT_DETACH);
> +		if (ret < 0) {
> +			fprintf(stderr, "%m - Failed to unmount %s", target);
> +			exit_code = EXIT_FAILURE;
> +			break;
> +		}
> +	}
> +
> +	(void)unlinkat(dfd, "detached-move-mount", AT_REMOVEDIR);
> +	close(dfd);
> +
> +	exit(exit_code);
> +}
> diff --git a/tests/generic/631 b/tests/generic/631
> new file mode 100644
> index 00000000..e227c448
> --- /dev/null
> +++ b/tests/generic/631
> @@ -0,0 +1,41 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0+
> +#
> +# Copyright (c) 2021 Christian Brauner <christian.brauner@ubuntu.com>
> +# All Rights Reserved.
> +#
> +# FS QA Test No. 631
> +#
> +# Regression test to verify that creating a series of detached mounts,
> +# attaching them to the filesystem, and unmounting them does not trigger an
> +# integer overflow in ns->mounts causing the kernel to block any new mounts in
> +# count_mounts() and returning ENOSPC because it falsely assumes that the
> +# maximum number of mounts in the mount namespace has been reached, i.e. it
> +# thinks it can't fit the new mounts into the mount namespace anymore.
> +#
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1	# failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +_cleanup()
> +{
> +	cd /
> +	rm -f $tmp.*
> +}
> +
> +. ./common/rc
> +
> +rm -f $seqres.full
> +
> +_supported_fs generic

_require_test

As test runs directly on $TEST_DIR.

Also needs

_require_test_program "detached_mounts_propagation"

> +
> +$here/src/detached_mounts_propagation $TEST_DIR >> $seqres.full
> +
> +echo silence is golden
> +status=$?

This should go after detached_mounts_propagation command?

Thanks,
Eryu

> +exit
> diff --git a/tests/generic/631.out b/tests/generic/631.out
> new file mode 100644
> index 00000000..ce88f447
> --- /dev/null
> +++ b/tests/generic/631.out
> @@ -0,0 +1,2 @@
> +QA output created by 631
> +silence is golden
> diff --git a/tests/generic/group b/tests/generic/group
> index 7c7531d1..151f8af2 100644
> --- a/tests/generic/group
> +++ b/tests/generic/group
> @@ -633,3 +633,4 @@
>  628 auto quick rw clone
>  629 auto quick rw copy_range
>  630 auto quick rw dedupe clone
> +631 auto quick mount
> -- 
> 2.27.0
Christian Brauner March 22, 2021, 10 a.m. UTC | #7
On Sun, Mar 21, 2021 at 10:28:52PM +0800, Eryu Guan wrote:
> On Tue, Mar 16, 2021 at 11:36:24AM +0100, Christian Brauner wrote:
> > Regression test to verify that creating a series of detached mounts,
> > attaching them to the filesystem, and unmounting them does not trigger an
> > integer overflow in ns->mounts causing the kernel to block any new mounts in
> > count_mounts() and returning ENOSPC because it falsely assumes that the
> > maximum number of mounts in the mount namespace has been reached, i.e. it
> > thinks it can't fit the new mounts into the mount namespace anymore.
> > 
> > The test is written in a way that it will leave the host's mount
> > namespace intact so we are sure to never make the host's mount namespace
> > unuseable!
> > 
> > Link: https://git.kernel.org/torvalds/c/ee2e3f50629f17b0752b55b2566c15ce8dafb557
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: David Howells <dhowells@redhat.com>
> > Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
> > ---
> > /* v1 */
> > patch not present
> > 
> > /* v2 */
> > patch not present
> > 
> > /* v3 */
> > patch not present
> > 
> > /* v4 */
> > patch not present
> > 
> > /* v5 */
> > patch not present
> > 
> > /* v6 */
> > patch not present
> > 
> > /* v7 */
> > patch not present
> > 
> > /* v8 */
> > patch introduced
> > 
> > /* v9 */
> > - Christian Brauner <christian.brauner@ubuntu.com>:
> >   - Rebased on current master.
> > ---
> >  .gitignore                        |   1 +
> >  src/Makefile                      |   3 +-
> >  src/detached_mounts_propagation.c | 252 ++++++++++++++++++++++++++++++
> >  tests/generic/631                 |  41 +++++
> >  tests/generic/631.out             |   2 +
> >  tests/generic/group               |   1 +
> >  6 files changed, 299 insertions(+), 1 deletion(-)
> >  create mode 100644 src/detached_mounts_propagation.c
> >  create mode 100644 tests/generic/631
> >  create mode 100644 tests/generic/631.out
> > 
> > diff --git a/.gitignore b/.gitignore
> > index f3bc0a4f..72bd40a0 100644
> > --- a/.gitignore
> > +++ b/.gitignore
> > @@ -62,6 +62,7 @@
> >  /src/cloner
> >  /src/dbtest
> >  /src/deduperace
> > +/src/detached_mounts_propagation
> >  /src/devzero
> >  /src/dio-interleaved
> >  /src/dio-invalidate-cache
> > diff --git a/src/Makefile b/src/Makefile
> > index 3d729a34..99019e6e 100644
> > --- a/src/Makefile
> > +++ b/src/Makefile
> > @@ -29,7 +29,8 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
> >  	attr-list-by-handle-cursor-test listxattr dio-interleaved t_dir_type \
> >  	dio-invalidate-cache stat_test t_encrypted_d_revalidate \
> >  	attr_replace_test swapon mkswap t_attr_corruption t_open_tmpfiles \
> > -	fscrypt-crypt-util bulkstat_null_ocount splice-test chprojid_fail
> > +	fscrypt-crypt-util bulkstat_null_ocount splice-test chprojid_fail \
> > +	detached_mounts_propagation
> >  
> >  SUBDIRS = log-writes perf
> >  
> > diff --git a/src/detached_mounts_propagation.c b/src/detached_mounts_propagation.c
> > new file mode 100644
> > index 00000000..4a588e46
> > --- /dev/null
> > +++ b/src/detached_mounts_propagation.c
> > @@ -0,0 +1,252 @@
> > +/* SPDX-License-Identifier: LGPL-2.1+ */
> > +/*
> > + * Copyright (c) 2021 Christian Brauner <christian.brauner@ubuntu.com>
> > + * All Rights Reserved.
> > + *
> > + * Regression test to verify that creating a series of detached mounts,
> > + * attaching them to the filesystem, and unmounting them does not trigger an
> > + * integer overflow in ns->mounts causing the kernel to block any new mounts in
> > + * count_mounts() and returning ENOSPC because it falsely assumes that the
> > + * maximum number of mounts in the mount namespace has been reached, i.e. it
> > + * thinks it can't fit the new mounts into the mount namespace anymore.
> > + */
> > +
> > +#include <errno.h>
> > +#include <fcntl.h>
> > +#include <getopt.h>
> > +#include <limits.h>
> > +#include <sched.h>
> > +#include <stdbool.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <sys/mount.h>
> > +#include <sys/stat.h>
> > +#include <sys/syscall.h>
> > +#include <sys/types.h>
> > +#include <unistd.h>
> > +
> > +/* open_tree() */
> > +#ifndef OPEN_TREE_CLONE
> > +#define OPEN_TREE_CLONE 1
> > +#endif
> > +
> > +#ifndef OPEN_TREE_CLOEXEC
> > +#define OPEN_TREE_CLOEXEC O_CLOEXEC
> > +#endif
> > +
> > +#ifndef __NR_open_tree
> > +	#if defined __alpha__
> > +		#define __NR_open_tree 538
> > +	#elif defined _MIPS_SIM
> > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > +			#define __NR_open_tree 4428
> > +		#endif
> > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > +			#define __NR_open_tree 6428
> > +		#endif
> > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > +			#define __NR_open_tree 5428
> > +		#endif
> > +	#elif defined __ia64__
> > +		#define __NR_open_tree (428 + 1024)
> > +	#else
> > +		#define __NR_open_tree 428
> > +	#endif
> > +#endif
> > +
> > +/* move_mount() */
> > +#ifndef MOVE_MOUNT_F_EMPTY_PATH
> > +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
> > +#endif
> > +
> > +#ifndef __NR_move_mount
> > +	#if defined __alpha__
> > +		#define __NR_move_mount 539
> > +	#elif defined _MIPS_SIM
> > +		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
> > +			#define __NR_move_mount 4429
> > +		#endif
> > +		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
> > +			#define __NR_move_mount 6429
> > +		#endif
> > +		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
> > +			#define __NR_move_mount 5429
> > +		#endif
> > +	#elif defined __ia64__
> > +		#define __NR_move_mount (428 + 1024)
> > +	#else
> > +		#define __NR_move_mount 429
> > +	#endif
> > +#endif
> > +
> > +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
> > +{
> > +	return syscall(__NR_open_tree, dfd, filename, flags);
> > +}
> > +
> > +static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
> > +				 const char *to_pathname, unsigned int flags)
> > +{
> > +	return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
> > +}
> > +
> > +static bool is_shared_mountpoint(const char *path)
> > +{
> > +	bool shared = false;
> > +	FILE *f = NULL;
> > +	char *line = NULL;
> > +	int i;
> > +	size_t len = 0;
> > +
> > +	f = fopen("/proc/self/mountinfo", "re");
> > +	if (!f)
> > +		return 0;
> > +
> > +	while (getline(&line, &len, f) > 0) {
> > +		char *slider1, *slider2;
> > +
> > +		for (slider1 = line, i = 0; slider1 && i < 4; i++)
> > +			slider1 = strchr(slider1 + 1, ' ');
> > +
> > +		if (!slider1)
> > +			continue;
> > +
> > +		slider2 = strchr(slider1 + 1, ' ');
> > +		if (!slider2)
> > +			continue;
> > +
> > +		*slider2 = '\0';
> > +		if (strcmp(slider1 + 1, path) == 0) {
> > +			/* This is the path. Is it shared? */
> > +			slider1 = strchr(slider2 + 1, ' ');
> > +			if (slider1 && strstr(slider1, "shared:")) {
> > +				shared = true;
> > +				break;
> > +			}
> > +		}
> > +	}
> > +	fclose(f);
> > +	free(line);
> > +
> > +	return shared;
> > +}
> > +
> > +static void usage(void)
> > +{
> > +	const char *text = "mount-new [--recursive] <base-dir>\n";
> > +	fprintf(stderr, "%s", text);
> > +	_exit(EXIT_SUCCESS);
> > +}
> > +
> > +#define exit_usage(format, ...)                              \
> > +	({                                                   \
> > +		fprintf(stderr, format "\n", ##__VA_ARGS__); \
> > +		usage();                                     \
> > +	})
> > +
> > +#define exit_log(format, ...)                                \
> > +	({                                                   \
> > +		fprintf(stderr, format "\n", ##__VA_ARGS__); \
> > +		exit(EXIT_FAILURE);                          \
> > +	})
> > +
> > +static const struct option longopts[] = {
> > +	{"help",	no_argument,		0,	'a'},
> > +	{ NULL,		no_argument,		0,	 0 },
> > +};
> > +
> > +int main(int argc, char *argv[])
> > +{
> > +	int exit_code = EXIT_SUCCESS, index = 0;
> > +	int dfd, fd_tree, new_argc, ret;
> > +	char *base_dir;
> > +	char *const *new_argv;
> > +	char target[PATH_MAX];
> > +
> > +	while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
> > +		switch (ret) {
> > +		case 'a':
> > +			/* fallthrough */
> > +		default:
> > +			usage();
> > +		}
> > +	}
> > +
> > +	new_argv = &argv[optind];
> > +	new_argc = argc - optind;
> > +	if (new_argc < 1)
> > +		exit_usage("Missing base directory\n");
> > +	base_dir = new_argv[0];
> > +
> > +	if (*base_dir != '/')
> > +		exit_log("Please specify an absolute path");
> > +
> > +	/* Ensure that target is a shared mountpoint. */
> > +	if (!is_shared_mountpoint(base_dir))
> > +		exit_log("Please ensure that \"%s\" is a shared mountpoint", base_dir);
> > +
> > +	ret = unshare(CLONE_NEWNS);
> > +	if (ret < 0)
> > +		exit_log("%m - Failed to create new mount namespace");
> > +
> > +	ret = mount(NULL, base_dir, NULL, MS_REC | MS_SHARED, NULL);
> > +	if (ret < 0)
> > +		exit_log("%m - Failed to make base_dir shared mountpoint");
> > +
> > +	dfd = open(base_dir, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
> > +	if (dfd < 0)
> > +		exit_log("%m - Failed to open base directory \"%s\"", base_dir);
> > +
> > +	ret = mkdirat(dfd, "detached-move-mount", 0755);
> > +	if (ret < 0)
> > +		exit_log("%m - Failed to create required temporary directories");
> > +
> > +	ret = snprintf(target, sizeof(target), "%s/detached-move-mount", base_dir);
> > +	if (ret < 0 || (size_t)ret >= sizeof(target))
> > +		exit_log("%m - Failed to assemble target path");
> > +
> > +	/*
> > +	 * Having a mount table with 10000 mounts is already quite excessive
> > +	 * and shoult account even for weird test systems.
> > +	 */
> > +	for (size_t i = 0; i < 10000; i++) {
> > +		fd_tree = sys_open_tree(dfd, "detached-move-mount",
> > +					OPEN_TREE_CLONE |
> > +					OPEN_TREE_CLOEXEC |
> > +					AT_EMPTY_PATH);
> > +		if (fd_tree < 0) {
> > +			if (errno == ENOSYS) /* New mount API not (fully) supported. */
> > +				break;
> > +
> > +			fprintf(stderr, "%m - Failed to open %d(detached-move-mount)", dfd);
> > +			exit_code = EXIT_FAILURE;
> > +			break;
> > +		}
> > +
> > +		ret = sys_move_mount(fd_tree, "", dfd, "detached-move-mount", MOVE_MOUNT_F_EMPTY_PATH);
> > +		if (ret < 0) {
> > +			if (errno == ENOSPC)
> > +				fprintf(stderr, "%m - Buggy mount counting");
> > +			else if (errno == ENOSYS) /* New mount API not (fully) supported. */
> > +				break;
> > +			else
> > +				fprintf(stderr, "%m - Failed to attach mount to %d(detached-move-mount)", dfd);
> > +			exit_code = EXIT_FAILURE;
> > +			break;
> > +		}
> > +		close(fd_tree);
> > +
> > +		ret = umount2(target, MNT_DETACH);
> > +		if (ret < 0) {
> > +			fprintf(stderr, "%m - Failed to unmount %s", target);
> > +			exit_code = EXIT_FAILURE;
> > +			break;
> > +		}
> > +	}
> > +
> > +	(void)unlinkat(dfd, "detached-move-mount", AT_REMOVEDIR);
> > +	close(dfd);
> > +
> > +	exit(exit_code);
> > +}
> > diff --git a/tests/generic/631 b/tests/generic/631
> > new file mode 100644
> > index 00000000..e227c448
> > --- /dev/null
> > +++ b/tests/generic/631
> > @@ -0,0 +1,41 @@
> > +#! /bin/bash
> > +# SPDX-License-Identifier: GPL-2.0+
> > +#
> > +# Copyright (c) 2021 Christian Brauner <christian.brauner@ubuntu.com>
> > +# All Rights Reserved.
> > +#
> > +# FS QA Test No. 631
> > +#
> > +# Regression test to verify that creating a series of detached mounts,
> > +# attaching them to the filesystem, and unmounting them does not trigger an
> > +# integer overflow in ns->mounts causing the kernel to block any new mounts in
> > +# count_mounts() and returning ENOSPC because it falsely assumes that the
> > +# maximum number of mounts in the mount namespace has been reached, i.e. it
> > +# thinks it can't fit the new mounts into the mount namespace anymore.
> > +#
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=1	# failure is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 15
> > +
> > +_cleanup()
> > +{
> > +	cd /
> > +	rm -f $tmp.*
> > +}
> > +
> > +. ./common/rc
> > +
> > +rm -f $seqres.full
> > +
> > +_supported_fs generic
> 
> _require_test

added

> 
> As test runs directly on $TEST_DIR.
> 
> Also needs
> 
> _require_test_program "detached_mounts_propagation"

added

> 
> > +
> > +$here/src/detached_mounts_propagation $TEST_DIR >> $seqres.full
> > +
> > +echo silence is golden
> > +status=$?
> 
> This should go after detached_mounts_propagation command?

Fixed.
(Yeah, though it doesn't really matter as the error would lead to the
mount namespace being unuseable and you'd see this pretty glaringly not
just in terms of behavior but also in terms of the output in 631.out.)

Christian
diff mbox series

Patch

diff --git a/.gitignore b/.gitignore
index f3bc0a4f..72bd40a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,6 +62,7 @@ 
 /src/cloner
 /src/dbtest
 /src/deduperace
+/src/detached_mounts_propagation
 /src/devzero
 /src/dio-interleaved
 /src/dio-invalidate-cache
diff --git a/src/Makefile b/src/Makefile
index 3d729a34..99019e6e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -29,7 +29,8 @@  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
 	attr-list-by-handle-cursor-test listxattr dio-interleaved t_dir_type \
 	dio-invalidate-cache stat_test t_encrypted_d_revalidate \
 	attr_replace_test swapon mkswap t_attr_corruption t_open_tmpfiles \
-	fscrypt-crypt-util bulkstat_null_ocount splice-test chprojid_fail
+	fscrypt-crypt-util bulkstat_null_ocount splice-test chprojid_fail \
+	detached_mounts_propagation
 
 SUBDIRS = log-writes perf
 
diff --git a/src/detached_mounts_propagation.c b/src/detached_mounts_propagation.c
new file mode 100644
index 00000000..4a588e46
--- /dev/null
+++ b/src/detached_mounts_propagation.c
@@ -0,0 +1,252 @@ 
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * Copyright (c) 2021 Christian Brauner <christian.brauner@ubuntu.com>
+ * All Rights Reserved.
+ *
+ * Regression test to verify that creating a series of detached mounts,
+ * attaching them to the filesystem, and unmounting them does not trigger an
+ * integer overflow in ns->mounts causing the kernel to block any new mounts in
+ * count_mounts() and returning ENOSPC because it falsely assumes that the
+ * maximum number of mounts in the mount namespace has been reached, i.e. it
+ * thinks it can't fit the new mounts into the mount namespace anymore.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* open_tree() */
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef __NR_open_tree
+	#if defined __alpha__
+		#define __NR_open_tree 538
+	#elif defined _MIPS_SIM
+		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
+			#define __NR_open_tree 4428
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
+			#define __NR_open_tree 6428
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+			#define __NR_open_tree 5428
+		#endif
+	#elif defined __ia64__
+		#define __NR_open_tree (428 + 1024)
+	#else
+		#define __NR_open_tree 428
+	#endif
+#endif
+
+/* move_mount() */
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+#endif
+
+#ifndef __NR_move_mount
+	#if defined __alpha__
+		#define __NR_move_mount 539
+	#elif defined _MIPS_SIM
+		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
+			#define __NR_move_mount 4429
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
+			#define __NR_move_mount 6429
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+			#define __NR_move_mount 5429
+		#endif
+	#elif defined __ia64__
+		#define __NR_move_mount (428 + 1024)
+	#else
+		#define __NR_move_mount 429
+	#endif
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+	return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
+static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
+				 const char *to_pathname, unsigned int flags)
+{
+	return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
+}
+
+static bool is_shared_mountpoint(const char *path)
+{
+	bool shared = false;
+	FILE *f = NULL;
+	char *line = NULL;
+	int i;
+	size_t len = 0;
+
+	f = fopen("/proc/self/mountinfo", "re");
+	if (!f)
+		return 0;
+
+	while (getline(&line, &len, f) > 0) {
+		char *slider1, *slider2;
+
+		for (slider1 = line, i = 0; slider1 && i < 4; i++)
+			slider1 = strchr(slider1 + 1, ' ');
+
+		if (!slider1)
+			continue;
+
+		slider2 = strchr(slider1 + 1, ' ');
+		if (!slider2)
+			continue;
+
+		*slider2 = '\0';
+		if (strcmp(slider1 + 1, path) == 0) {
+			/* This is the path. Is it shared? */
+			slider1 = strchr(slider2 + 1, ' ');
+			if (slider1 && strstr(slider1, "shared:")) {
+				shared = true;
+				break;
+			}
+		}
+	}
+	fclose(f);
+	free(line);
+
+	return shared;
+}
+
+static void usage(void)
+{
+	const char *text = "mount-new [--recursive] <base-dir>\n";
+	fprintf(stderr, "%s", text);
+	_exit(EXIT_SUCCESS);
+}
+
+#define exit_usage(format, ...)                              \
+	({                                                   \
+		fprintf(stderr, format "\n", ##__VA_ARGS__); \
+		usage();                                     \
+	})
+
+#define exit_log(format, ...)                                \
+	({                                                   \
+		fprintf(stderr, format "\n", ##__VA_ARGS__); \
+		exit(EXIT_FAILURE);                          \
+	})
+
+static const struct option longopts[] = {
+	{"help",	no_argument,		0,	'a'},
+	{ NULL,		no_argument,		0,	 0 },
+};
+
+int main(int argc, char *argv[])
+{
+	int exit_code = EXIT_SUCCESS, index = 0;
+	int dfd, fd_tree, new_argc, ret;
+	char *base_dir;
+	char *const *new_argv;
+	char target[PATH_MAX];
+
+	while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
+		switch (ret) {
+		case 'a':
+			/* fallthrough */
+		default:
+			usage();
+		}
+	}
+
+	new_argv = &argv[optind];
+	new_argc = argc - optind;
+	if (new_argc < 1)
+		exit_usage("Missing base directory\n");
+	base_dir = new_argv[0];
+
+	if (*base_dir != '/')
+		exit_log("Please specify an absolute path");
+
+	/* Ensure that target is a shared mountpoint. */
+	if (!is_shared_mountpoint(base_dir))
+		exit_log("Please ensure that \"%s\" is a shared mountpoint", base_dir);
+
+	ret = unshare(CLONE_NEWNS);
+	if (ret < 0)
+		exit_log("%m - Failed to create new mount namespace");
+
+	ret = mount(NULL, base_dir, NULL, MS_REC | MS_SHARED, NULL);
+	if (ret < 0)
+		exit_log("%m - Failed to make base_dir shared mountpoint");
+
+	dfd = open(base_dir, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+	if (dfd < 0)
+		exit_log("%m - Failed to open base directory \"%s\"", base_dir);
+
+	ret = mkdirat(dfd, "detached-move-mount", 0755);
+	if (ret < 0)
+		exit_log("%m - Failed to create required temporary directories");
+
+	ret = snprintf(target, sizeof(target), "%s/detached-move-mount", base_dir);
+	if (ret < 0 || (size_t)ret >= sizeof(target))
+		exit_log("%m - Failed to assemble target path");
+
+	/*
+	 * Having a mount table with 10000 mounts is already quite excessive
+	 * and shoult account even for weird test systems.
+	 */
+	for (size_t i = 0; i < 10000; i++) {
+		fd_tree = sys_open_tree(dfd, "detached-move-mount",
+					OPEN_TREE_CLONE |
+					OPEN_TREE_CLOEXEC |
+					AT_EMPTY_PATH);
+		if (fd_tree < 0) {
+			if (errno == ENOSYS) /* New mount API not (fully) supported. */
+				break;
+
+			fprintf(stderr, "%m - Failed to open %d(detached-move-mount)", dfd);
+			exit_code = EXIT_FAILURE;
+			break;
+		}
+
+		ret = sys_move_mount(fd_tree, "", dfd, "detached-move-mount", MOVE_MOUNT_F_EMPTY_PATH);
+		if (ret < 0) {
+			if (errno == ENOSPC)
+				fprintf(stderr, "%m - Buggy mount counting");
+			else if (errno == ENOSYS) /* New mount API not (fully) supported. */
+				break;
+			else
+				fprintf(stderr, "%m - Failed to attach mount to %d(detached-move-mount)", dfd);
+			exit_code = EXIT_FAILURE;
+			break;
+		}
+		close(fd_tree);
+
+		ret = umount2(target, MNT_DETACH);
+		if (ret < 0) {
+			fprintf(stderr, "%m - Failed to unmount %s", target);
+			exit_code = EXIT_FAILURE;
+			break;
+		}
+	}
+
+	(void)unlinkat(dfd, "detached-move-mount", AT_REMOVEDIR);
+	close(dfd);
+
+	exit(exit_code);
+}
diff --git a/tests/generic/631 b/tests/generic/631
new file mode 100644
index 00000000..e227c448
--- /dev/null
+++ b/tests/generic/631
@@ -0,0 +1,41 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright (c) 2021 Christian Brauner <christian.brauner@ubuntu.com>
+# All Rights Reserved.
+#
+# FS QA Test No. 631
+#
+# Regression test to verify that creating a series of detached mounts,
+# attaching them to the filesystem, and unmounting them does not trigger an
+# integer overflow in ns->mounts causing the kernel to block any new mounts in
+# count_mounts() and returning ENOSPC because it falsely assumes that the
+# maximum number of mounts in the mount namespace has been reached, i.e. it
+# thinks it can't fit the new mounts into the mount namespace anymore.
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+	cd /
+	rm -f $tmp.*
+}
+
+. ./common/rc
+
+rm -f $seqres.full
+
+_supported_fs generic
+
+$here/src/detached_mounts_propagation $TEST_DIR >> $seqres.full
+
+echo silence is golden
+status=$?
+exit
diff --git a/tests/generic/631.out b/tests/generic/631.out
new file mode 100644
index 00000000..ce88f447
--- /dev/null
+++ b/tests/generic/631.out
@@ -0,0 +1,2 @@ 
+QA output created by 631
+silence is golden
diff --git a/tests/generic/group b/tests/generic/group
index 7c7531d1..151f8af2 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -633,3 +633,4 @@ 
 628 auto quick rw clone
 629 auto quick rw copy_range
 630 auto quick rw dedupe clone
+631 auto quick mount