Message ID | 1577071109-68332-1-git-send-email-sunke32@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [blktests,v4] nbd/003:add mount and clear_sock test for nbd | expand |
Hi Omar, The nbd/003 you simplified does the same I want to do and I made some small changes. I ran the simplified nbd/003 with linux kernel at the commit 7e0165b2f1a, it could pass.Then, I rollbacked the linux kernel to commit 090bb803708, it indeed triggered the BUGON. However, there is one difference. NBD has ioctl and netlink interfaces. I use the netlink interface and the simplified nbd/003 use the ioctl interface. The nbd/003 with the netlink interface indeed seem to trigger some other issue. So, can it be nbd/004? thanks, Sun Ke.
ping 在 2019/12/23 11:18, Sun Ke 写道: > Add the test case to check nbd device. This test case catches regressions > fixed by commit 92b5c8f0063e4 "nbd: replace kill_bdev() with > __invalidate_device() again". > > Establish the nbd connection. Run two processes. The first one do mount > and umount, and the other one do clear_sock ioctl. > > Signed-off-by: Sun Ke <sunke32@huawei.com> > [Omar: simplify] > Signed-off-by: Omar Sandoval <osandov@fb.com> > --- > simplified nbd/003 -> v4 > 1. mkfs.ext4 /dev/nbd0 >> "$FULL" 2>&1. > 2. Allow mount and umount to fail. if clear sock do the first, mount and > umount can not be successful. > 3. Add the loops to 5000. So it is very likely to trigger the BUGON. > --- > src/Makefile | 5 +-- > src/mount_clear_sock.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++ > tests/nbd/003 | 30 +++++++++++++++++ > tests/nbd/003.out | 1 + > 4 files changed, 125 insertions(+), 2 deletions(-) > create mode 100644 src/mount_clear_sock.c > create mode 100644 tests/nbd/003 > create mode 100644 tests/nbd/003.out > > diff --git a/src/Makefile b/src/Makefile > index 917d6f4..3b587f6 100644 > --- a/src/Makefile > +++ b/src/Makefile > @@ -4,12 +4,13 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ > > C_TARGETS := \ > loblksize \ > + loop_change_fd \ > loop_get_status_null \ > + mount_clear_sock \ > + nbdsetsize \ > openclose \ > sg/dxfer-from-dev \ > sg/syzkaller1 \ > - nbdsetsize \ > - loop_change_fd \ > zbdioctl > > CXX_TARGETS := \ > diff --git a/src/mount_clear_sock.c b/src/mount_clear_sock.c > new file mode 100644 > index 0000000..ba9ed71 > --- /dev/null > +++ b/src/mount_clear_sock.c > @@ -0,0 +1,91 @@ > +// SPDX-License-Identifier: GPL-3.0+ > +// Copyright (C) 2019 Sun Ke > + > +#include <assert.h> > +#include <fcntl.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > +#include <sys/ioctl.h> > +#include <sys/mount.h> > +#include <sys/stat.h> > +#include <sys/types.h> > +#include <sys/wait.h> > +#include <linux/fs.h> > +#include <linux/nbd.h> > + > +int main(int argc, char **argv) > +{ > + const char *mountpoint, *dev, *fstype; > + int loops, fd; > + > + if (argc != 5) { > + fprintf(stderr, "usage: %s DEV MOUNTPOINT FSTYPE LOOPS", argv[0]); > + return EXIT_FAILURE; > + } > + > + dev = argv[1]; > + mountpoint = argv[2]; > + fstype = argv[3]; > + loops = atoi(argv[4]); > + > + fd = open(dev, O_RDWR); > + if (fd == -1) { > + perror("open"); > + return EXIT_FAILURE; > + } > + > + for (int i = 0; i < loops; i++) { > + pid_t mount_pid, clear_sock_pid; > + int wstatus; > + > + mount_pid = fork(); > + if (mount_pid == -1) { > + perror("fork"); > + return EXIT_FAILURE; > + } > + if (mount_pid == 0) { > + mount(dev, mountpoint, fstype, > + MS_NOSUID | MS_SYNCHRONOUS, 0); > + umount(mountpoint); > + exit(EXIT_SUCCESS); > + } > + > + clear_sock_pid = fork(); > + if (clear_sock_pid == -1) { > + perror("fork"); > + return EXIT_FAILURE; > + } > + if (clear_sock_pid == 0) { > + if (ioctl(fd, NBD_CLEAR_SOCK, 0) == -1) { > + perror("ioctl"); > + exit(EXIT_FAILURE); > + } > + exit(EXIT_SUCCESS); > + } > + > + if (waitpid(mount_pid, &wstatus, 0) == -1) { > + perror("waitpid"); > + return EXIT_FAILURE; > + } > + if (!WIFEXITED(wstatus) || > + WEXITSTATUS(wstatus) != EXIT_SUCCESS) { > + fprintf(stderr, "mount process failed"); > + return EXIT_FAILURE; > + } > + > + if (waitpid(clear_sock_pid, &wstatus, 0) == -1) { > + perror("waitpid"); > + return EXIT_FAILURE; > + } > + if (!WIFEXITED(wstatus) || > + WEXITSTATUS(wstatus) != EXIT_SUCCESS) { > + fprintf(stderr, "NBD_CLEAR_SOCK process failed"); > + return EXIT_FAILURE; > + } > + } > + > + close(fd); > + return EXIT_SUCCESS; > +} > diff --git a/tests/nbd/003 b/tests/nbd/003 > new file mode 100644 > index 0000000..57fb63a > --- /dev/null > +++ b/tests/nbd/003 > @@ -0,0 +1,30 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-3.0+ > +# Copyright (C) 2019 Sun Ke > +# > +# Regression test for commit 2b5c8f0063e4 ("nbd: replace kill_bdev() with > +# __invalidate_device() again"). > + > +. tests/nbd/rc > + > +DESCRIPTION="mount/unmount concurrently with NBD_CLEAR_SOCK" > +QUICK=1 > + > +requires() { > + _have_nbd && _have_src_program mount_clear_sock > +} > + > +test() { > + echo "Running ${TEST_NAME}" > + > + _start_nbd_server > + nbd-client -L -N export localhost /dev/nbd0 >> "$FULL" 2>&1 > + mkfs.ext4 /dev/nbd0 >> "$FULL" 2>&1 > + > + mkdir -p "${TMPDIR}/mnt" > + src/mount_clear_sock /dev/nbd0 "${TMPDIR}/mnt" ext4 5000 > + umount "${TMPDIR}/mnt" > /dev/null 2>&1 > + > + nbd-client -d /dev/nbd0 >> "$FULL" 2>&1 > + _stop_nbd_server > +} > diff --git a/tests/nbd/003.out b/tests/nbd/003.out > new file mode 100644 > index 0000000..aa340db > --- /dev/null > +++ b/tests/nbd/003.out > @@ -0,0 +1 @@ > +Running nbd/003 >
On Mon, Dec 23, 2019 at 11:18:29AM +0800, Sun Ke wrote: > Add the test case to check nbd device. This test case catches regressions > fixed by commit 92b5c8f0063e4 "nbd: replace kill_bdev() with > __invalidate_device() again". > > Establish the nbd connection. Run two processes. The first one do mount > and umount, and the other one do clear_sock ioctl. > > Signed-off-by: Sun Ke <sunke32@huawei.com> > [Omar: simplify] > Signed-off-by: Omar Sandoval <osandov@fb.com> > --- > simplified nbd/003 -> v4 > 1. mkfs.ext4 /dev/nbd0 >> "$FULL" 2>&1. > 2. Allow mount and umount to fail. if clear sock do the first, mount and > umount can not be successful. > 3. Add the loops to 5000. So it is very likely to trigger the BUGON. > --- Thanks, this looks good now. Even on v5.6-rc1, it seems to trigger a bug (below). I'll go ahead and merge it anyways. [ 303.434579] EXT4-fs (nbd0): unable to read superblock [ 303.436134] nbd0: detected capacity change from 0 to 10737418240 [ 303.437237] ldm_validate_partition_table(): Disk read failed. [ 303.438468] nbd0: unable to read partition table [ 303.439485] ldm_validate_partition_table(): Disk read failed. [ 303.441976] nbd0: unable to read partition table [ 303.452610] block nbd0: NBD_DISCONNECT [ 303.470762] BUG: kernel NULL pointer dereference, address: 0000000000000020 [ 303.472461] #PF: supervisor write access in kernel mode [ 303.473573] #PF: error_code(0x0002) - not-present page [ 303.474632] PGD 0 P4D 0 [ 303.475197] Oops: 0002 [#1] PREEMPT SMP PTI [ 303.476084] CPU: 1 PID: 10589 Comm: nbd-client Not tainted 5.6.0-rc1 #1 [ 303.477446] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20191223_100556-anatol 04/01/2014 [ 303.479481] RIP: 0010:mutex_lock+0x10/0x20 [ 303.480348] Code: 1f 84 00 00 00 00 00 0f 1f 00 0f 1f 44 00 00 be 02 00 00 00 e9 b1 fa ff ff 90 0f 1f 44 00 00 31 c0 65 48 8b 14 25 00 7d 01 00 <f0> 48 0f b1 17 74 02 eb d7 c3 66 0f 1f 44 00 00 0f 1f 44 00 00 41 [ 303.484418] RSP: 0018:ffffa2cb03a739d8 EFLAGS: 00010246 [ 303.485814] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 303.487388] RDX: ffff9b6e7695d7c0 RSI: ffffffffad0fa1aa RDI: 0000000000000020 [ 303.488895] RBP: ffffa2cb03a73a70 R08: 0000000000000000 R09: ffff9b6e7cccd870 [ 303.490560] R10: 00000000000001dd R11: ffff9b6e7dcabb38 R12: ffff9b6e77e7f0b8 [ 303.492080] R13: ffffa2cb03a73a00 R14: ffffffffc03f5e28 R15: ffffffffad50c2c0 [ 303.493536] FS: 00007f128b70cf00(0000) GS:ffff9b6e7dc80000(0000) knlGS:0000000000000000 [ 303.495147] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 303.497022] CR2: 0000000000000020 CR3: 0000000077a14000 CR4: 00000000000006e0 [ 303.500036] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 303.502180] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 303.504659] Call Trace: [ 303.505198] flush_workqueue+0xa7/0x470 [ 303.505999] ? nbd_size_update+0x120/0x120 [nbd] [ 303.506960] nbd_disconnect_and_put+0x51/0x70 [nbd] [ 303.507980] nbd_genl_disconnect+0xc6/0x160 [nbd] [ 303.508971] genl_rcv_msg+0x1d2/0x480 [ 303.509755] ? __netlink_sendskb+0x3b/0x50 [ 303.510651] ? netlink_unicast+0x200/0x240 [ 303.511512] ? genl_family_rcv_msg_attrs_parse+0x100/0x100 [ 303.512647] netlink_rcv_skb+0x75/0x140 [ 303.513485] genl_rcv+0x24/0x40 [ 303.514252] netlink_unicast+0x199/0x240 [ 303.515082] netlink_sendmsg+0x243/0x480 [ 303.515959] sock_sendmsg+0x5e/0x60 [ 303.516699] ____sys_sendmsg+0x21b/0x290 [ 303.517563] ? copy_msghdr_from_user+0xe1/0x160 [ 303.518561] ___sys_sendmsg+0x9e/0xe0 [ 303.519299] __sys_sendmsg+0x81/0xd0 [ 303.520133] do_syscall_64+0x55/0x1d0 [ 303.521446] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 303.522776] RIP: 0033:0x7f128bc257b7 [ 303.523640] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 303.527427] RSP: 002b:00007ffda5862268 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 303.529015] RAX: ffffffffffffffda RBX: 00005601c7140d00 RCX: 00007f128bc257b7 [ 303.532267] RDX: 0000000000000000 RSI: 00007ffda58622a0 RDI: 0000000000000003 [ 303.534946] RBP: 00005601c7140c10 R08: 0000000000000004 R09: 0000000000000000 [ 303.536810] R10: fffffffffffff08a R11: 0000000000000246 R12: 00005601c7140e20 [ 303.538417] R13: 00007ffda58622a0 R14: 0000000000000003 R15: 00000000ffffffff [ 303.539894] Modules linked in: nbd btrfs pata_acpi ata_piix libata blake2b_generic xor scsi_mod nvme raid6_pq nvme_core t10_pi virtio_net libcrc32c crc_t10dif net_failover crct10dif_generic virtio_rng crct10dif_common rng_core failover [ 303.544347] CR2: 0000000000000020 [ 303.545180] ---[ end trace 12a9191fdb6b31e0 ]--- [ 303.546216] RIP: 0010:mutex_lock+0x10/0x20 [ 303.547546] Code: 1f 84 00 00 00 00 00 0f 1f 00 0f 1f 44 00 00 be 02 00 00 00 e9 b1 fa ff ff 90 0f 1f 44 00 00 31 c0 65 48 8b 14 25 00 7d 01 00 <f0> 48 0f b1 17 74 02 eb d7 c3 66 0f 1f 44 00 00 0f 1f 44 00 00 41 [ 303.551350] RSP: 0018:ffffa2cb03a739d8 EFLAGS: 00010246 [ 303.552399] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 303.553828] RDX: ffff9b6e7695d7c0 RSI: ffffffffad0fa1aa RDI: 0000000000000020 [ 303.555385] RBP: ffffa2cb03a73a70 R08: 0000000000000000 R09: ffff9b6e7cccd870 [ 303.556859] R10: 00000000000001dd R11: ffff9b6e7dcabb38 R12: ffff9b6e77e7f0b8 [ 303.558350] R13: ffffa2cb03a73a00 R14: ffffffffc03f5e28 R15: ffffffffad50c2c0 [ 303.559619] FS: 00007f128b70cf00(0000) GS:ffff9b6e7dc80000(0000) knlGS:0000000000000000 [ 303.561982] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 303.563519] CR2: 0000000000000020 CR3: 0000000077a14000 CR4: 00000000000006e0 [ 303.565038] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 303.566782] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
On Mon, Dec 23, 2019 at 11:15:35AM +0800, sunke (E) wrote: > Hi Omar, > > The nbd/003 you simplified does the same I want to do and I made some small > changes. I ran the simplified nbd/003 with linux kernel at the commit > 7e0165b2f1a, it could pass.Then, I rollbacked the linux kernel to commit > 090bb803708, it indeed triggered the BUGON. > > However, there is one difference. NBD has ioctl and netlink interfaces. I > use the netlink interface and the simplified nbd/003 use the ioctl > interface. The nbd/003 with the netlink interface indeed seem to trigger > some other issue. So, can it be nbd/004? Sure, how about we add a flag to mount_clear_sock that specifies to use the netlink interface instead of the ioctl interface, and make nbd/004 which is the same as nbd/003 expect it runs it with the netlink flag?
在 2020/2/12 6:23, Omar Sandoval 写道: > On Mon, Dec 23, 2019 at 11:15:35AM +0800, sunke (E) wrote: >> Hi Omar, >> >> The nbd/003 you simplified does the same I want to do and I made some small >> changes. I ran the simplified nbd/003 with linux kernel at the commit >> 7e0165b2f1a, it could pass.Then, I rollbacked the linux kernel to commit >> 090bb803708, it indeed triggered the BUGON. >> >> However, there is one difference. NBD has ioctl and netlink interfaces. I >> use the netlink interface and the simplified nbd/003 use the ioctl >> interface. The nbd/003 with the netlink interface indeed seem to trigger >> some other issue. So, can it be nbd/004? > > Sure, how about we add a flag to mount_clear_sock that specifies to use > the netlink interface instead of the ioctl interface, and make nbd/004 > which is the same as nbd/003 expect it runs it with the netlink flag? > Hi Omar I can not understand adding a flag to mount_clear_sock. How about add _start_nbd_server_netlink and _stop_nbd_server_netlink in tests/nbd/rc, others can also reuse the code? Thanks Sun Ke
On Wed, Feb 12, 2020 at 04:13:02PM +0800, sunke (E) wrote: > > > 在 2020/2/12 6:23, Omar Sandoval 写道: > > On Mon, Dec 23, 2019 at 11:15:35AM +0800, sunke (E) wrote: > > > Hi Omar, > > > > > > The nbd/003 you simplified does the same I want to do and I made some small > > > changes. I ran the simplified nbd/003 with linux kernel at the commit > > > 7e0165b2f1a, it could pass.Then, I rollbacked the linux kernel to commit > > > 090bb803708, it indeed triggered the BUGON. > > > > > > However, there is one difference. NBD has ioctl and netlink interfaces. I > > > use the netlink interface and the simplified nbd/003 use the ioctl > > > interface. The nbd/003 with the netlink interface indeed seem to trigger > > > some other issue. So, can it be nbd/004? > > > > Sure, how about we add a flag to mount_clear_sock that specifies to use > > the netlink interface instead of the ioctl interface, and make nbd/004 > > which is the same as nbd/003 expect it runs it with the netlink flag? > > > Hi Omar > > I can not understand adding a flag to mount_clear_sock. Sorry, I thought you were saying that there is a netlink interface equivalent to ioctl(NBD_CLEAR_SOCK). > How about add > _start_nbd_server_netlink and _stop_nbd_server_netlink in tests/nbd/rc, > others can also reuse the code? Sure, that works.
diff --git a/src/Makefile b/src/Makefile index 917d6f4..3b587f6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,12 +4,13 @@ HAVE_C_HEADER = $(shell if echo "\#include <$(1)>" | \ C_TARGETS := \ loblksize \ + loop_change_fd \ loop_get_status_null \ + mount_clear_sock \ + nbdsetsize \ openclose \ sg/dxfer-from-dev \ sg/syzkaller1 \ - nbdsetsize \ - loop_change_fd \ zbdioctl CXX_TARGETS := \ diff --git a/src/mount_clear_sock.c b/src/mount_clear_sock.c new file mode 100644 index 0000000..ba9ed71 --- /dev/null +++ b/src/mount_clear_sock.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-3.0+ +// Copyright (C) 2019 Sun Ke + +#include <assert.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <linux/fs.h> +#include <linux/nbd.h> + +int main(int argc, char **argv) +{ + const char *mountpoint, *dev, *fstype; + int loops, fd; + + if (argc != 5) { + fprintf(stderr, "usage: %s DEV MOUNTPOINT FSTYPE LOOPS", argv[0]); + return EXIT_FAILURE; + } + + dev = argv[1]; + mountpoint = argv[2]; + fstype = argv[3]; + loops = atoi(argv[4]); + + fd = open(dev, O_RDWR); + if (fd == -1) { + perror("open"); + return EXIT_FAILURE; + } + + for (int i = 0; i < loops; i++) { + pid_t mount_pid, clear_sock_pid; + int wstatus; + + mount_pid = fork(); + if (mount_pid == -1) { + perror("fork"); + return EXIT_FAILURE; + } + if (mount_pid == 0) { + mount(dev, mountpoint, fstype, + MS_NOSUID | MS_SYNCHRONOUS, 0); + umount(mountpoint); + exit(EXIT_SUCCESS); + } + + clear_sock_pid = fork(); + if (clear_sock_pid == -1) { + perror("fork"); + return EXIT_FAILURE; + } + if (clear_sock_pid == 0) { + if (ioctl(fd, NBD_CLEAR_SOCK, 0) == -1) { + perror("ioctl"); + exit(EXIT_FAILURE); + } + exit(EXIT_SUCCESS); + } + + if (waitpid(mount_pid, &wstatus, 0) == -1) { + perror("waitpid"); + return EXIT_FAILURE; + } + if (!WIFEXITED(wstatus) || + WEXITSTATUS(wstatus) != EXIT_SUCCESS) { + fprintf(stderr, "mount process failed"); + return EXIT_FAILURE; + } + + if (waitpid(clear_sock_pid, &wstatus, 0) == -1) { + perror("waitpid"); + return EXIT_FAILURE; + } + if (!WIFEXITED(wstatus) || + WEXITSTATUS(wstatus) != EXIT_SUCCESS) { + fprintf(stderr, "NBD_CLEAR_SOCK process failed"); + return EXIT_FAILURE; + } + } + + close(fd); + return EXIT_SUCCESS; +} diff --git a/tests/nbd/003 b/tests/nbd/003 new file mode 100644 index 0000000..57fb63a --- /dev/null +++ b/tests/nbd/003 @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2019 Sun Ke +# +# Regression test for commit 2b5c8f0063e4 ("nbd: replace kill_bdev() with +# __invalidate_device() again"). + +. tests/nbd/rc + +DESCRIPTION="mount/unmount concurrently with NBD_CLEAR_SOCK" +QUICK=1 + +requires() { + _have_nbd && _have_src_program mount_clear_sock +} + +test() { + echo "Running ${TEST_NAME}" + + _start_nbd_server + nbd-client -L -N export localhost /dev/nbd0 >> "$FULL" 2>&1 + mkfs.ext4 /dev/nbd0 >> "$FULL" 2>&1 + + mkdir -p "${TMPDIR}/mnt" + src/mount_clear_sock /dev/nbd0 "${TMPDIR}/mnt" ext4 5000 + umount "${TMPDIR}/mnt" > /dev/null 2>&1 + + nbd-client -d /dev/nbd0 >> "$FULL" 2>&1 + _stop_nbd_server +} diff --git a/tests/nbd/003.out b/tests/nbd/003.out new file mode 100644 index 0000000..aa340db --- /dev/null +++ b/tests/nbd/003.out @@ -0,0 +1 @@ +Running nbd/003