diff mbox

WARNING in notify_change

Message ID 96c750b3-fcb0-3d7f-45eb-45459078ef83@I-love.SAKURA.ne.jp (mailing list archive)
State New, archived
Headers show

Commit Message

Tetsuo Handa June 11, 2018, 10:33 a.m. UTC
I haven't succeeded reproducing this bug using original C reproducer.
Instead, I'm observing XFS warning using modified reproducer shown below.

----------------------------------------
// autogenerated by syzkaller (http://github.com/google/syzkaller)

#include <fcntl.h>
#include <linux/futex.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/xattr.h>

struct thread_t {
	int created, running, call;
	pthread_t th;
};

static struct thread_t threads[16];
static void execute_call(int call);
static int running;
static int collide;

static void* thr(void* arg)
{
	struct thread_t* th = (struct thread_t*)arg;
	for (;;) {
		while (!__atomic_load_n(&th->running, __ATOMIC_ACQUIRE))
			syscall(SYS_futex, &th->running, FUTEX_WAIT, 0, 0);
		execute_call(th->call);
		__atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
		__atomic_store_n(&th->running, 0, __ATOMIC_RELEASE);
		syscall(SYS_futex, &th->running, FUTEX_WAKE);
	}
	return 0;
}

static void execute(int num_calls)
{
	int call, thread;
	running = 0;
	for (call = 0; call < num_calls; call++) {
		for (thread = 0; thread < sizeof(threads) / sizeof(threads[0]); thread++) {
			struct thread_t* th = &threads[thread];
			if (!th->created) {
				th->created = 1;
				pthread_attr_t attr;
				pthread_attr_init(&attr);
				pthread_attr_setstacksize(&attr, 128 << 10);
				pthread_create(&th->th, &attr, thr, th);
			}
			if (!__atomic_load_n(&th->running, __ATOMIC_ACQUIRE)) {
				th->call = call;
				__atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
				__atomic_store_n(&th->running, 1, __ATOMIC_RELEASE);
				syscall(SYS_futex, &th->running, FUTEX_WAKE);
				if (collide && call % 2)
					break;
				struct timespec ts;
				ts.tv_sec = 0;
				ts.tv_nsec = 20 * 1000 * 1000;
				syscall(SYS_futex, &th->running, FUTEX_WAIT, 1, &ts);
				if (running)
					usleep((call == num_calls - 1) ? 10000 : 1000);
				break;
			}
		}
	}
}

static void execute_call(int call)
{
	switch (call) {
	case 0:
		creat("file0", 0x20000080);
		break;
	case 1:
		lsetxattr("file0", "security.capability",
			  "\x00\x00\x00\x02\x01\x00\x00\x00\x00\x00\x00\x01"
			  "\x04\x00\x00\x00\x00\x00\x00\x00", 20, 0);
		break;
	}
}

int main(int argc, char *argv[])
{
	execute_call(0);
	execute_call(1);
	execute(2);
	return 0;
}
----------------------------------------

When lsetxattr() is called in parallel (with CONFIG_XFS_WARN=y),
XFS emits below warning due to ATTR_KILL_PRIV not cleared yet.

----------------------------------------
[   33.347151] XFS: Assertion failed: (iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0, file: fs/xfs/xfs_iops.c, line: 792
[   33.353353] ------------[ cut here ]------------
[   33.355376] WARNING: CPU: 0 PID: 5355 at fs/xfs/xfs_message.c:105 asswarn+0x2c/0x30 [xfs]
[   33.358501] Modules linked in: ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter vmw_balloon intel_powerclamp pcspkr vmw_vmci i2c_piix4 sg ppdev parport_pc parport ip_tables xfs libcrc32c sd_mod sr_mod cdrom ata_generic pata_acpi serio_raw vmwgfx drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops mptspi e1000 ttm scsi_transport_spi ahci mptscsih libahci drm ata_piix mptbase i2c_core libata
[   33.385636] CPU: 0 PID: 5355 Comm: a.out Not tainted 4.9.0-rc1+ #55
[   33.389051] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017
[   33.393952]  ffffae56d0dafb78 ffffffff8f30c3ed 0000000000000000 0000000000000000
[   33.398032]  ffffae56d0dafbb8 ffffffff8f075431 00000069d0dafbd8 ffff9e5ca0ea1040
[   33.401454]  ffffae56d0dafcd8 ffff9e5ca0e9f778 0000000000000000 ffff9e5ca0ea1318
[   33.404603] Call Trace:
[   33.406183]  [<ffffffff8f30c3ed>] dump_stack+0x8e/0xd1
[   33.408541]  [<ffffffff8f075431>] __warn+0xc1/0xe0
[   33.410781]  [<ffffffff8f075508>] warn_slowpath_null+0x18/0x20
[   33.413358]  [<ffffffffc06ac33c>] asswarn+0x2c/0x30 [xfs]
[   33.415856]  [<ffffffffc06a49b4>] xfs_setattr_size+0xc4/0x3a0 [xfs]
[   33.418549]  [<ffffffffc06a4cb7>] xfs_vn_setattr_size+0x27/0x30 [xfs]
[   33.421409]  [<ffffffffc06a4d18>] xfs_vn_setattr+0x58/0x80 [xfs]
[   33.421413]  [<ffffffff8f203aa3>] notify_change+0x313/0x430
[   33.421415]  [<ffffffff8f1dfc48>] do_truncate+0x58/0x90
[   33.421417]  [<ffffffff8f1f2e33>] path_openat+0xbc3/0xd20
[   33.421419]  [<ffffffff8f1f42eb>] do_filp_open+0x7b/0xd0
[   33.421422]  [<ffffffff8f6aa162>] ? _raw_spin_unlock+0x22/0x30
[   33.421424]  [<ffffffff8f204e13>] ? __alloc_fd+0xf3/0x200
[   33.421425]  [<ffffffff8f1e11ad>] do_sys_open+0x19d/0x230
[   33.421426]  [<ffffffff8f1e1299>] SyS_creat+0x19/0x20
[   33.421429]  [<ffffffff8f0036a7>] do_syscall_64+0x67/0x1f0
[   33.421430]  [<ffffffff8f6aa949>] entry_SYSCALL64_slow_path+0x25/0x25
[   33.421481] ---[ end trace be3bb63a2a7fe0de ]---
----------------------------------------

Bisection reached commit 030b533c4fd4d2ec "fs: Avoid premature clearing of capabilities".
Since that commit changed whether ATTR_KILL_PRIV is removed from attr->ia_valid,
it is possible that ATTR_KILL_PRIV is delivered to xfs_setattr_size().
And making below change solves the XFS warning.

----------------------------------------
----------------------------------------

But I have no idea why executing in parallel makes such difference.
Maybe parallel execution is something related to why syzbot is hitting
WARN_ON_ONCE(!inode_is_locked(inode)) in notify_change()...

Comments

Khazhy Kumykov April 15, 2019, 11:20 p.m. UTC | #1
I was able to reproduce this by setting security.capability xattr on a
blockdev file, then writing to it - when writing to the blockdev we
never lock the inode, so when we clear the capability we hit this
lockdep warning.

Is the issue here that we can set this xattr in the first place so we
have to clear it at all? Or should we really be locking the inode for
blockdevs after all? I'm not too familiar, but my gut says former

this reproducer is able to immediately crash machine running
linux-next-20190415:

#include <errno.h>
#include <fcntl.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <attr/xattr.h>

char *disk = "/dev/loop0";

int main(void) {
        int fd = open(disk, 0);
        if (fd < 0)
                printf("open: %d\n", errno);

        system("dd if=/dev/zero of=a_file count=51200");
        system("losetup /dev/loop0 a_file");

        uint32_t value[5] = {
                0x2000000,
                7,
                0x20d0,
                6,
                4
        };

        int res = fsetxattr(fd, "security.capability", &value,
sizeof(value), XATTR_CREATE);
        if (res < 0)
                printf ("xattr: %d\n", errno);

        int fd2 = open(disk, O_RDWR);
        write(fd2, "hello", 5);
        return 0;
}
Al Viro April 15, 2019, 11:54 p.m. UTC | #2
On Mon, Apr 15, 2019 at 04:20:17PM -0700, Khazhismel Kumykov wrote:
> I was able to reproduce this by setting security.capability xattr on a
> blockdev file, then writing to it - when writing to the blockdev we
> never lock the inode, so when we clear the capability we hit this
> lockdep warning.
> 
> Is the issue here that we can set this xattr in the first place so we
> have to clear it at all? Or should we really be locking the inode for
> blockdevs after all? I'm not too familiar, but my gut says former

More interesting question is, WTF do we even touch that thing for
bdev?  The thing is, mknod will cheerfully create any number of
different filesystem objects, all giving access to the same block
device.  Which of them should have that xattr removed?  It makes
no sense whatsoever; moreover, who *cares* about caps for block
device in the first place?

And if we did, what of another way to modify the block device?
You know, mount it read-write...
Jan Kara April 18, 2019, 11:03 a.m. UTC | #3
On Tue 16-04-19 00:54:28, Al Viro wrote:
> On Mon, Apr 15, 2019 at 04:20:17PM -0700, Khazhismel Kumykov wrote:
> > I was able to reproduce this by setting security.capability xattr on a
> > blockdev file, then writing to it - when writing to the blockdev we
> > never lock the inode, so when we clear the capability we hit this
> > lockdep warning.
> > 
> > Is the issue here that we can set this xattr in the first place so we
> > have to clear it at all? Or should we really be locking the inode for
> > blockdevs after all? I'm not too familiar, but my gut says former
> 
> More interesting question is, WTF do we even touch that thing for
> bdev?  The thing is, mknod will cheerfully create any number of
> different filesystem objects, all giving access to the same block
> device.  Which of them should have that xattr removed?  It makes
> no sense whatsoever; moreover, who *cares* about caps for block
> device in the first place?
> 
> And if we did, what of another way to modify the block device?
> You know, mount it read-write...

Yes, Alexander Lochman has sent a patch to silence this warning back in
February [1] by just bailing out from file_remove_privs() for non-regular
files.  But so far you've ignored that patch... Will you pick it up please?

								Honza

[1] https://lore.kernel.org/lkml/cbdc8071-de76-bb0a-6890-15ef21023a70@tu-dortmund.de
diff mbox

Patch

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 3b4be06..a67af5b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -832,7 +832,7 @@ 
 	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
 	ASSERT(S_ISREG(inode->i_mode));
 	ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
-		ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+		ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);
 
 	oldsize = inode->i_size;
 	newsize = iattr->ia_size;